# Intern Performance Prediction Model (Colab Ready)
This notebook creates a sample dataset, trains a Random Forest Regression model, and evaluates performance.

In [ ]:
# Install (if needed)
!pip -q install xgboost

In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

## Create Sample Dataset

In [ ]:
np.random.seed(42)
n=120
df = pd.DataFrame({
    'Intern_ID': range(1001,1001+n),
    'Task_Time': np.round(np.random.normal(6,1.5,n).clip(2,10),2),
    'Feedback_Rating': np.round(np.random.uniform(2.5,5.0,n),2),
    'Attendance': np.round(np.random.uniform(60,100,n),2),
    'Tasks_Completed': np.random.randint(5,25,n)
})

df['Performance_Score'] = (
    0.3*(100 - df['Task_Time']*8) +
    0.3*(df['Feedback_Rating']*20) +
    0.2*(df['Attendance']) +
    0.2*(df['Tasks_Completed']*3)
).clip(40,98).round(0)

df.head()

## Train Model

In [ ]:
X = df[['Task_Time','Feedback_Rating','Attendance','Tasks_Completed']]
y = df['Performance_Score']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)

print('MSE:', mean_squared_error(y_test,y_pred))
print('R2 Score:', r2_score(y_test,y_pred))

## Feature Importance

In [ ]:
importance = model.feature_importances_
plt.figure()
plt.bar(X.columns, importance)
plt.title('Feature Importance')
plt.show()