In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
import streamlit as st
import math

# Load the data into a DataFrame
data = pd.read_csv( '../data/Institute_yearly_data.csv')


# Define the features and target variables
X = data[["Institute", "Year"]]
y_min_opening = data["min_opening_rank"]
y_max_closing = data["max_closing_rank"]

# Initialize and train separate models for min opening rank and max closing rank
model_min_opening = LinearRegression()
model_max_closing = LinearRegression()

model_min_opening.fit(X, y_min_opening)
model_max_closing.fit(X, y_max_closing)

# Predict the next 4 years for each institute
future_years = np.arange(2025, 2029)
future_institutes = data["Institute"].unique()
predictions = []

for institute in future_institutes:
    for year in future_years:
        input_features = np.array([[institute, year]])
        min_opening_pred = model_min_opening.predict(input_features)[0]
        max_closing_pred = model_max_closing.predict(input_features)[0]
        predictions.append([institute, year, min_opening_pred, max_closing_pred])

# Create a DataFrame with the predictions
predictions_df = pd.DataFrame(predictions, columns=["Institute", "Year", "min_opening_rank", "max_closing_rank"])

# Combine original data with predictions
combined_df = pd.concat([data, predictions_df]).sort_values(by=["Institute", "Year"])
combined_df["min_opening_rank"] = combined_df["min_opening_rank"].apply(math.floor)
combined_df["max_closing_rank"] = combined_df["max_closing_rank"].apply(math.floor)
# Save to CSV
combined_df.to_csv("../data/prediction/combined_institute_predictions.csv", index=False)




In [9]:

# Load the data into a DataFrame
data = pd.read_csv( '../data/Gender_yearly_data.csv')


# Define the features and target variables
X = data[["Gender", "Year"]]
y_min_opening = data["min_opening_rank"]
y_max_closing = data["max_closing_rank"]

# Initialize and train separate models for min opening rank and max closing rank
model_min_opening = LinearRegression()
model_max_closing = LinearRegression()

model_min_opening.fit(X, y_min_opening)
model_max_closing.fit(X, y_max_closing)

# Predict the next 4 years for each Gender
future_years = np.arange(2025, 2029)
future_Genders = data["Gender"].unique()
predictions = []

for Gender in future_Genders:
    for year in future_years:
        input_features = np.array([[Gender, year]])
        min_opening_pred = model_min_opening.predict(input_features)[0]
        max_closing_pred = model_max_closing.predict(input_features)[0]
        predictions.append([Gender, year, min_opening_pred, max_closing_pred])

# Create a DataFrame with the predictions
predictions_df = pd.DataFrame(predictions, columns=["Gender", "Year", "min_opening_rank", "max_closing_rank"])

# Combine original data with predictions
combined_df = pd.concat([data, predictions_df]).sort_values(by=["Gender", "Year"])
combined_df["min_opening_rank"] = combined_df["min_opening_rank"].apply(math.floor)
combined_df["max_closing_rank"] = combined_df["max_closing_rank"].apply(math.floor)
# Save to CSV
combined_df.to_csv("../data/prediction/combined_Gender_predictions.csv", index=False)



In [10]:

# Load the data into a DataFrame
data = pd.read_csv( '../data/Academic_Program_Name_yearly_data.csv')


# Define the features and target variables
X = data[["Academic_Program_Name", "Year"]]
y_min_opening = data["min_opening_rank"]
y_max_closing = data["max_closing_rank"]

# Initialize and train separate models for min opening rank and max closing rank
model_min_opening = LinearRegression()
model_max_closing = LinearRegression()

model_min_opening.fit(X, y_min_opening)
model_max_closing.fit(X, y_max_closing)

# Predict the next 4 years for each Academic_Program_Name
future_years = np.arange(2025, 2029)
future_Academic_Program_Names = data["Academic_Program_Name"].unique()
predictions = []

for Academic_Program_Name in future_Academic_Program_Names:
    for year in future_years:
        input_features = np.array([[Academic_Program_Name, year]])
        min_opening_pred = model_min_opening.predict(input_features)[0]
        max_closing_pred = model_max_closing.predict(input_features)[0]
        predictions.append([Academic_Program_Name, year, min_opening_pred, max_closing_pred])

# Create a DataFrame with the predictions
predictions_df = pd.DataFrame(predictions, columns=["Academic_Program_Name", "Year", "min_opening_rank", "max_closing_rank"])

# Combine original data with predictions
combined_df = pd.concat([data, predictions_df]).sort_values(by=["Academic_Program_Name", "Year"])
combined_df["min_opening_rank"] = combined_df["min_opening_rank"].apply(math.floor)
combined_df["max_closing_rank"] = combined_df["max_closing_rank"].apply(math.floor)
# Save to CSV
combined_df.to_csv("../data/prediction/combined_Academic_Program_Name_predictions.csv", index=False)



