In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import math
# Load the data
data = pd.read_csv('../data/Institute_yearly_data.csv')
data = data.sort_values(by=["Institute", "Year"])

predictions = []
future_years = range(2025, 2029)

# Generate predictions for each institute
for institute in data["Institute"].unique():
    institute_data = data[data["Institute"] == institute]
    years = institute_data["Year"].values
    y_min_opening = institute_data["min_opening_rank"].values
    y_max_closing = institute_data["max_closing_rank"].values

    # Fit ARIMA for min opening rank
    model_min_opening = ARIMA(y_min_opening, order=(1, 1, 1))
    model_min_opening_fit = model_min_opening.fit()

    # Fit ARIMA for max closing rank
    model_max_closing = ARIMA(y_max_closing, order=(1, 1, 1))
    model_max_closing_fit = model_max_closing.fit()

    # Forecasting future years
    forecast_min_opening = model_min_opening_fit.forecast(steps=len(future_years))
    forecast_max_closing = model_max_closing_fit.forecast(steps=len(future_years))

    for year, pred_min, pred_max in zip(future_years, forecast_min_opening, forecast_max_closing):
        predictions.append([institute, year, math.floor(pred_min), math.floor(pred_max)])

predictions_df = pd.DataFrame(predictions, columns=["Institute", "Year", "min_opening_rank", "max_closing_rank"])
combined_df = pd.concat([data, predictions_df], ignore_index=True).sort_values(by=["Institute", "Year"])
combined_df.to_csv("../data/prediction/combined_institute_predictions.csv", index=False)


In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import math
# Load the data
data = pd.read_csv('../data/Gender_yearly_data.csv')
data = data.sort_values(by=["Gender", "Year"])

predictions = []
future_years = range(2025, 2029)

# Generate predictions for each Gender
for Gender in data["Gender"].unique():
    Gender_data = data[data["Gender"] == Gender]
    years = Gender_data["Year"].values
    y_min_opening = Gender_data["min_opening_rank"].values
    y_max_closing = Gender_data["max_closing_rank"].values

    # Fit ARIMA for min opening rank
    model_min_opening = ARIMA(y_min_opening, order=(1, 1, 1))
    model_min_opening_fit = model_min_opening.fit()

    # Fit ARIMA for max closing rank
    model_max_closing = ARIMA(y_max_closing, order=(1, 1, 1))
    model_max_closing_fit = model_max_closing.fit()

    # Forecasting future years
    forecast_min_opening = model_min_opening_fit.forecast(steps=len(future_years))
    forecast_max_closing = model_max_closing_fit.forecast(steps=len(future_years))

    for year, pred_min, pred_max in zip(future_years, forecast_min_opening, forecast_max_closing):
        predictions.append([Gender, year, math.floor(pred_min), math.floor(pred_max)])

predictions_df = pd.DataFrame(predictions, columns=["Gender", "Year", "min_opening_rank", "max_closing_rank"])
combined_df = pd.concat([data, predictions_df], ignore_index=True).sort_values(by=["Gender", "Year"])
combined_df.to_csv("../data/prediction/combined_Gender_predictions.csv", index=False)


In [34]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import math
# Load the data
data = pd.read_csv('../data/Academic_Program_Name_yearly_data.csv')
data = data.sort_values(by=["Academic_Program_Name", "Year"])

predictions = []
future_years = range(2025, 2029)

# Generate predictions for each Gender
for Academic_Program_Name in data["Academic_Program_Name"].unique():
    Academic_Program_Name_data = data[data["Academic_Program_Name"] == Academic_Program_Name]
    years = Academic_Program_Name_data["Year"].values
    y_min_opening = Academic_Program_Name_data["min_opening_rank"].values
    y_max_closing = Academic_Program_Name_data["max_closing_rank"].values

    # Fit ARIMA for min opening rank
    model_min_opening = ARIMA(y_min_opening, order=(1, 1, 1))
    model_min_opening_fit = model_min_opening.fit()

    # Fit ARIMA for max closing rank
    model_max_closing = ARIMA(y_max_closing, order=(1, 1, 1))
    model_max_closing_fit = model_max_closing.fit()

    # Forecasting future years
    forecast_min_opening = model_min_opening_fit.forecast(steps=len(future_years))
    forecast_max_closing = model_max_closing_fit.forecast(steps=len(future_years))

    for year, pred_min, pred_max in zip(future_years, forecast_min_opening, forecast_max_closing):
        print(Academic_Program_Name)
        predictions.append([Academic_Program_Name, year, math.floor(pred_min), math.floor(pred_max)])

predictions_df = pd.DataFrame(predictions, columns=["Academic_Program_Name", "Year", "min_opening_rank", "max_closing_rank"])
combined_df = pd.concat([data, predictions_df], ignore_index=True).sort_values(by=["Academic_Program_Name", "Year"])
combined_df.to_csv("../data/prediction/combined_Academic_Program_Name_predictions.csv", index=False)


0
0
0
0
1
1
1
1
2
2
2
2
3
3
3
3
4
4
4
4
5
5
5
5
6
6
6
6
7
7
7
7
8
8
8
8
9
9
9
9
10
10
10
10
11
11
11
11
12
12
12
12
13
13
13
13
14
14
14
14
15
15
15
15
16
16
16
16
17
17
17
17
18
18
18
18
19
19
19
19
20
20
20
20
21
21
21
21
22
22
22
22
23
23
23
23
24
24
24
24
25
25
25
25
26
26
26
26
27
27
27
27
28
28
28
28
29
29
29
29
30
30
30
30
31
31
31
31
32
32
32
32
33
33
33
33
34
34
34
34
35
35
35
35
36
36
36
36
37
37
37
37
38
38
38
38
41
41
41
41
42
42
42
42
43
43
43
43
44
44
44
44
45
45
45
45
46
46
46
46
47
47
47
47
48
48
48
48
49
49
49
49
50
50
50
50
51
51
51
51
52
52
52
52
53
53
53
53
54
54
54
54
55
55
55
55
56
56
56
56
57
57
57
57
58
58
58
58
59
59
59
59
60
60
60
60
61
61
61
61
62
62
62
62
63
63
63
63
64
64
64
64
65
65
65
65
66
66
66
66
67
67
67
67
68
68
68
68
69
69
69
69
70
70
70
70
71
71
71
71
72
72
72
72
73
73
73
73
74
74
74
74
75
75
75
75
76
76
76
76
77
77
77
77
78
78
78
78
79
79
79
79
80
80
80
80
81
81
81
81
82
82
82
82
83
83
83
83
84
84
84
84
85
85
85
85
86
86
86
86
87
87
87
87
88
88
88

  np.inner(score_obs, score_obs) /


100
100
100
100
101
101
101
101
102
102
102
102
103
103
103
103
104
104
104
104
105
105
105
105
106
106
106
106
108
108
108
108
109
109
109
109
110
110
110
110
111
111
111
111
112
112
112
112
113
113
113
113
114
114
114
114
115
115
115
115
116
116
116
116
117
117
117
117
118
118
118
118
119
119
119
119
120
120
120
120
121
121
121
121
122
122
122
122
123
123
123
123
124
124
124
124
125
125
125
125
126
126
126
126
127
127
127
127
128
128
128
128
129
129
129
129
130
130
130
130
131
131
131
131
132
132
132
132
133
133
133
133
134
134
134
134
135
135
135
135
136
136
136
136
137
137
137
137
138
138
138
138
139
139
139
139
140
140
140
140
141
141
141
141
142
142
142
142
143
143
143
143
144
144
144
144
145
145
145
145
