In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # Since Passed is binary
from sklearn.metrics import accuracy_score, confusion_matrix

# --- Step 1: Load and preprocess data ---
df = pd.read_csv("dummy_data.csv")

df_label = df.copy()
le = LabelEncoder()
df_label['Gender_encoded'] = le.fit_transform(df_label["Gender"])
df_label['Passed_encoded'] = le.fit_transform(df_label["Passed"])

df_encoded = pd.get_dummies(df_label, columns=['City'])
city_columns = [col for col in df_encoded.columns if col.startswith('City_')]
df_encoded[city_columns] = df_encoded[city_columns].astype(int)

print("✅ Final preprocessed data:")
print(df_encoded)

# --- Step 2: Prepare features and target ---
# Drop 'Name', 'Gender', 'Passed' columns (text)
X = df_encoded.drop(columns=['Name', 'Gender', 'Passed', 'Passed_encoded'])
y = df_encoded['Passed_encoded']  # Target variable (0/1)

# --- Step 3: Split data ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Step 4: Train the model ---
model = LogisticRegression()
model.fit(X_train, y_train)

# --- Step 5: Make predictions ---
y_pred = model.predict(X_test)

# --- Step 6: Evaluate the model ---
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("\n✅ Model Accuracy:", accuracy)
print("Confusion Matrix:\n", cm)

# --- Step 7: Predict for new data ---
# Example: a new student
new_student = pd.DataFrame({
    'Gender_encoded': [1],        # Male=1, Female=0 (depends on your encoding)
    'City_Biratnagar': [0],
    'City_Butwal': [0],
    'City_Kathmandu': [1],
    'City_Lalitpur': [0],
    'City_Pokhara': [0]
})
new_prediction = model.predict(new_student)
print("\nPredicted Passed (0=No, 1=Yes):", new_prediction[0])


✅ Final preprocessed data:
       Name  Gender Passed  Gender_encoded  Passed_encoded  City_Biratnagar  \
0     Aarav    Male    Yes               1               1                0   
1      Sita  Female     No               0               0                0   
2     Kiran  Female     No               0               0                0   
3      Ravi  Female     No               0               0                0   
4    Anjali    Male    Yes               1               1                1   
5    Manish  Female    Yes               0               1                0   
6      Puja    Male    Yes               1               1                1   
7     Suman    Male     No               1               0                0   
8     Nabin    Male     No               1               0                0   
9   Kritika    Male     No               1               0                0   
10  Prakash  Female     No               0               0                0   
11   Sunita    Male     N