In [69]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [70]:
# Load the dataset from the Excel file
data_file = "education_visa_countrywise_cleaned .xlsx"  # Path to your Excel file
data = pd.read_excel(data_file)

# Define features (X) and target (y) based on the dataset
# Replace 'target_column' with the actual name of your target column
X = data.drop(columns=["Country of Citizenship", "2015", "2016"])  # Drop target columns
# Assuming 'data' is your DataFrame and you have two target columns '2015' and '2016'
y = data[['2015', '2016']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)




In [71]:
# Define models (We'll use a MultiOutput classifier)
from sklearn.multioutput import MultiOutputClassifier

# Example: Using DecisionTreeClassifier for multi-target classification
base_model = DecisionTreeClassifier()
multi_target_model = MultiOutputClassifier(base_model)

# Train the multi-target model
multi_target_model.fit(X_train, y_train)

# Evaluate the model
y_pred = multi_target_model.predict(X_test)

# Convert results to DataFrame for better readability
y_test = pd.DataFrame(y_test, columns=["2015", "2016"])
y_pred = pd.DataFrame(y_pred, columns=["2015", "2016"])

# Compute accuracy (or other metrics per target column)
from sklearn.metrics import accuracy_score

results = {"2015 Accuracy": accuracy_score(y_test["2015"], y_pred["2015"]),
           "2016 Accuracy": accuracy_score(y_test["2016"], y_pred["2016"])}
print("Multi-target evaluation results:", results)


Multi-target evaluation results: {'2015 Accuracy': 0.125, '2016 Accuracy': 0.25}


In [72]:
# Ensuring the target labels are converted from one-hot encoding 
if len(y_train.shape) > 1 and y_train.shape[1] > 1:  # One-hot encoded
    y_train = np.argmax(y_train, axis=1)
    y_test = np.argmax(y_test, axis=1)

# Print shapes to confirm correctness
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Define models
models = {
    "Logistic Regression": LogisticRegression(),
    "Perceptron": Perceptron(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Train and evaluate models
results = []
for name, model in models.items():  # Changed model to models
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Compute metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Store results
    results.append({"Model": name, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1-score": f1})

# Display results in a tabular format
results_df = pd.DataFrame(results)
print(results_df)

y_train shape: (92,)
y_test shape: (40,)
                 Model  Accuracy  Precision  Recall  F1-score
0  Logistic Regression     0.750   0.815789   0.750  0.679654
1           Perceptron     0.600   0.657576   0.600  0.616000
2        Decision Tree     0.675   0.652688   0.675  0.660048
3          Naive Bayes     0.525   0.816129   0.525  0.507982
