<a href="https://colab.research.google.com/github/2300080003/secondproject/blob/main/Deepika.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# ------------------------------
# Step 1: Generate Dummy Dataset
# ------------------------------
# Simulated student internship data
data = pd.DataFrame({
    'GPA': [8.5, 7.2, 6.0, 9.0, 5.8, 7.9, 6.5, 8.1, 7.5, 5.9],
    'Attendance': [90, 85, 70, 95, 60, 88, 75, 92, 80, 65],
    'Skills_Matched': ['Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'No'],
    'Projects_Completed': [3, 1, 2, 4, 0, 3, 1, 4, 2, 1],
    'Hours_Per_Week': [15, 10, 5, 20, 6, 18, 7, 19, 9, 8],
    'Success': [1, 0, 0, 1, 0, 1, 0, 1, 0, 0]
})

print("\nSample Data:\n", data)

# ------------------------------
# Step 2: Data Preprocessing
# ------------------------------
# Convert Yes/No to binary
data['Skills_Matched'] = data['Skills_Matched'].map({'Yes': 1, 'No': 0})

# Handle missing values (if any)
imputer = SimpleImputer(strategy='mean')
data.iloc[:, :-1] = imputer.fit_transform(data.iloc[:, :-1])  # All features except 'Success'

# Feature matrix and label
X = data.drop('Success', axis=1)
y = data['Success']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# ------------------------------
# Step 3: Train ML Models
# ------------------------------

# Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Support Vector Machine
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

# ------------------------------
# Step 4: Evaluate Models
# ------------------------------
print("\n=== Logistic Regression Report ===")
print(classification_report(y_test, log_pred))

print("\n=== Random Forest Report ===")
print(classification_report(y_test, rf_pred))

print("\n=== SVM Report ===")
print(classification_report(y_test, svm_pred))



Sample Data:
    GPA  Attendance Skills_Matched  Projects_Completed  Hours_Per_Week  Success
0  8.5          90            Yes                   3              15        1
1  7.2          85             No                   1              10        0
2  6.0          70            Yes                   2               5        0
3  9.0          95            Yes                   4              20        1
4  5.8          60             No                   0               6        0
5  7.9          88            Yes                   3              18        1
6  6.5          75             No                   1               7        0
7  8.1          92            Yes                   4              19        1
8  7.5          80             No                   2               9        0
9  5.9          65             No                   1               8        0

=== Logistic Regression Report ===
              precision    recall  f1-score   support

           0       1.00  