In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the data
data = pd.read_csv('kaggle_pivot_min_descending.csv')

# Preprocess data
# Encode categorical variables
label_encoder_college = LabelEncoder()
label_encoder_score_type = LabelEncoder()
label_encoder_seat_type = LabelEncoder()
label_encoder_branch = LabelEncoder()

data['college_name'] = label_encoder_college.fit_transform(data['college_name'])
data['score_type'] = label_encoder_score_type.fit_transform(data['score_type'])
data['seat_type'] = label_encoder_seat_type.fit_transform(data['seat_type'])
data['branch'] = label_encoder_branch.fit_transform(data['branch'])

# Define features and target
X = data[['score_type', 'seat_type', 'branch', 'sum', 'count', 'max', 'min', 'mean', 'max-min', 'max-mean']]
y = data['college_name']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

# Predict for new data
def predict_college(score_type, seat_type, branch, sum_score, count, max_score, min_score, mean_score, max_min_diff, max_mean_diff):
    input_data = pd.DataFrame([[score_type, seat_type, branch, sum_score, count, max_score, min_score, mean_score, max_min_diff, max_mean_diff]], 
                              columns=['score_type', 'seat_type', 'branch', 'sum', 'count', 'max', 'min', 'mean', 'max-min', 'max-mean'])
    
    input_data['score_type'] = label_encoder_score_type.transform(input_data['score_type'])
    input_data['seat_type'] = label_encoder_seat_type.transform(input_data['seat_type'])
    input_data['branch'] = label_encoder_branch.transform(input_data['branch'])
    
    return label_encoder_college.inverse_transform(model.predict(input_data))

# Example prediction
predicted_college = predict_college('MHT-CET', 'TFWS', 'Computer Engineering', 299.6381411, 3, 99.9132362, 99.843394, 99.87938037, 0.0698422, 0.033855833)
print(f'Predicted College: {predicted_college[0]}')


Accuracy: 0.04580690627202255
Predicted College: Veermata Jijabai Technological Institute(VJTI), Matunga, Mumbai


In [6]:
data

Unnamed: 0,college_name,score_type,seat_type,branch,sum,count,max,min,mean,max-min,max-mean
0,302,1,76,17,299.638141,3,99.913236,99.843394,99.879380,0.069842,0.033856
1,302,1,48,17,699.371128,7,100.000000,99.829843,99.910161,0.170156,0.089839
2,302,1,24,17,1698.390955,17,99.995273,99.817615,99.905350,0.177659,0.089923
3,38,1,18,17,99.803612,1,99.803612,99.803612,99.803612,0.000000,0.000000
4,38,1,39,17,99.787304,1,99.787304,99.787304,99.787304,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
28372,49,1,22,63,301.478995,8,57.441365,0.014461,37.684874,57.426904,19.756490
28373,213,1,23,63,10.207831,2,10.198231,0.009599,5.103915,10.188632,5.094316
28374,191,0,32,63,0.005895,1,0.005895,0.005895,0.005895,0.000000,0.000000
28375,13,1,49,40,0.004865,1,0.004865,0.004865,0.004865,0.000000,0.000000


In [7]:
import pickle 
pickle.dump(model,open('model_1.pkl','wb'))