In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Example: Load your data
students_df = pd.read_csv('students.csv')  # student_id, age, gender, etc.
courses_df = pd.read_csv('courses.csv')    # course_id, course_name, syllabus_length, etc.
performance_df = pd.read_csv('performance.csv')  # student_id, course_id, grade, etc.

# Merge the data to create a full student-course interaction dataframe
interaction_df = pd.merge(performance_df, students_df, on='student_id', how='left')
interaction_df = pd.merge(interaction_df, courses_df, on='course_id', how='left')

# Create target variable: 1 if the student interacted (e.g., enrolled in or performed well in the course), otherwise 0
interaction_df['interaction'] = interaction_df['grade'].apply(lambda x: 1 if x >= 50 else 0)  # Example: if grade >= 50, interaction = 1

# Feature Engineering: Create the features for XGBoost
interaction_df['age'] = interaction_df['age'].fillna(interaction_df['age'].mean())
interaction_df['gender'] = interaction_df['gender'].fillna('unknown')

# Encoding categorical features (e.g., gender)
label_encoder = LabelEncoder()
interaction_df['gender'] = label_encoder.fit_transform(interaction_df['gender'])

# Select the features for training
features = ['age', 'gender', 'syllabus_length', 'grade']  # You can add more features like past course interactions
X = interaction_df[features]
y = interaction_df['interaction']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
