In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt




In [None]:

# Load the dataset
dataset_path = r'C:\Users\matha\OneDrive\Desktop\movie\project_data.csv'
df = pd.read_csv(dataset_path)



In [None]:

# Drop rows with any null values
df.dropna(inplace=True)

# Encode categorical columns: COLLEGE, BRANCH, CATEGORY
label_encoders = {}
for column in ['COLLEGE', 'BRANCH', 'CATEGORY']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le


In [None]:

# Define a target variable 'eligible' based on rank criteria
def create_eligibility_column(df, student_rank):
    df['eligible'] = ((df['OPENING_RANK'] <= student_rank) & (student_rank <= df['CLOSING_RANK'])).astype(int)
    return df

# Generate eligibility column with a sample rank threshold
student_rank = int(input("Enter rank : "))
df = create_eligibility_column(df, student_rank)



In [None]:
# Split the data
X = df[['NIRF', 'COLLEGE', 'YEAR', 'BRANCH', 'CATEGORY', 'OPENING_RANK', 'CLOSING_RANK']]
y = df['eligible']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

In [None]:
# Function to get the top 5 eligible colleges for a given student rank and community
def get_top_5_colleges(student_rank, community):
    # Encode the community (CATEGORY) input
    community_encoded = label_encoders['CATEGORY'].transform([community])[0]
    
    # Filter dataset for eligibility based on community and rank
    eligible_df = df[(df['CATEGORY'] == community_encoded) & 
                     (df['OPENING_RANK'] <= student_rank) & 
                     (student_rank <= df['CLOSING_RANK'])]
    
    # Sort by NIRF rank to get the top-ranked colleges and branches

    eligible_df = eligible_df.sort_values(by='NIRF').head(15)
    
    # Decode COLLEGE and BRANCH names back to original for displaying results
    eligible_df['COLLEGE'] = label_encoders['COLLEGE'].inverse_transform(eligible_df['COLLEGE'])
    eligible_df['BRANCH'] = label_encoders['BRANCH'].inverse_transform(eligible_df['BRANCH'])
    
    # Return only relevant columns
    return eligible_df[['NIRF', 'COLLEGE', 'BRANCH', 'OPENING_RANK', 'CLOSING_RANK']]

In [None]:

# Example usage of top 5 eligible colleges function
top_5_colleges = get_top_5_colleges(student_rank, 'GENERAL')
print(f"Student Rank: {student_rank}")
print("Top 5 Colleges and Branches:")
print(top_5_colleges)

In [None]:

# Visualize the Decision Tree
plt.figure(figsize=(20, 10))
plot_tree(model, feature_names=X.columns, class_names=['Not Eligible', 'Eligible'], filled=True, rounded=True)
plt.show()