In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import RandomOverSampler
import os

file_path = "project_final.xlsx"
df = pd.read_excel(file_path, sheet_name='project')

binary_cols = ['Food', 'Shelter', 'Clothes', 'Electricity Access', 'Access to Healthcare']
for col in binary_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

le_need = LabelEncoder()
df['Need Level'] = le_need.fit_transform(df['Need Level'])

le_livelihood = LabelEncoder()
df.dropna(subset=['Livelihood Type'], inplace=True)
df['Livelihood Type'] = le_livelihood.fit_transform(df['Livelihood Type'])

le_income = LabelEncoder()
df['Income Level'] = le_income.fit_transform(df['Income Level'])

df['Daily or Monthly wages'] = pd.to_numeric(df['Daily or Monthly wages'], errors='coerce')
df['Daily or Monthly wages'].fillna(df['Daily or Monthly wages'].median(), inplace=True)

X_need = df[binary_cols]
y_need = df['Need Level']

X_livelihood = df[['No. of members', 'No. of working people', 'Income Level', 'Daily or Monthly wages', 'No. of people educated']]
y_livelihood = df['Livelihood Type']

ros = RandomOverSampler(random_state=42)
X_need_resampled, y_need_resampled = ros.fit_resample(X_need, y_need)

X_train_need, X_test_need, y_train_need, y_test_need = train_test_split(X_need_resampled, y_need_resampled, test_size=0.2, random_state=42)
X_train_livelihood, X_test_livelihood, y_train_livelihood, y_test_livelihood = train_test_split(X_livelihood, y_livelihood, test_size=0.2, random_state=42)

clf_need = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
clf_need.fit(X_train_need, y_train_need)

y_pred_need = clf_need.predict(X_test_need)
print("Basic Needs Classification Accuracy:", accuracy_score(y_test_need, y_pred_need))
print("Basic Needs Classification Report:\n", classification_report(y_test_need, y_pred_need, target_names=le_need.classes_))

clf_livelihood = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
clf_livelihood.fit(X_train_livelihood, y_train_livelihood)

y_pred_livelihood = clf_livelihood.predict(X_test_livelihood)
print("Livelihood Type Classification Accuracy:", accuracy_score(y_test_livelihood, y_pred_livelihood))
print("Livelihood Type Classification Report:\n", classification_report(y_test_livelihood, y_pred_livelihood, target_names=le_livelihood.classes_))

def classify_input():
    print("\nEnter values to test classification:")
    food = int(input("Food (1 for Yes, 0 for No): "))
    shelter = int(input("Shelter (1 for Yes, 0 for No): "))
    clothes = int(input("Clothes (1 for Yes, 0 for No): "))
    electricity = int(input("Electricity (1 for Yes, 0 for No): "))
    healthcare = int(input("Access to Healthcare (1 for Yes, 0 for No): "))
    members = int(input("No. of Members: "))
    working_people = int(input("No. of Working People: "))
    income = int(input("Income Level (encoded value): "))
    wages = float(input("Daily or Monthly Wages: "))
    educated_people = int(input("No. of People Educated: "))

    need_pred = clf_need.predict([[food, shelter, clothes, electricity, healthcare]])[0]
    livelihood_pred = clf_livelihood.predict([[members, working_people, income, wages, educated_people]])[0]

    need_pred_label = le_need.inverse_transform([need_pred])[0]
    livelihood_pred_label = le_livelihood.inverse_transform([livelihood_pred])[0]

    print("Predicted Need Level:", need_pred_label)
    print("Predicted Livelihood Type:", livelihood_pred_label)

    new_data = pd.DataFrame([{
        'Food': food,
        'Shelter': shelter,
        'Clothes': clothes,
        'Electricity Access': electricity,
        'Access to Healthcare': healthcare,
        'No. of Members': members,
        'No. of Working People': working_people,
        'Income Level': income,
        'Daily or Monthly Wages': wages,
        'No. of People Educated': educated_people,
        'Predicted Need Level': need_pred_label,
        'Predicted Livelihood Type': livelihood_pred_label
    }])
    csv_filename = "predictions.csv"
    if os.path.exists(csv_filename):
        new_data.to_csv(csv_filename, mode='a', header=False, index=False)
    else:
        new_data.to_csv(csv_filename, mode='w', header=True, index=False)
    print(f"Results saved to {csv_filename}")
classify_input()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Daily or Monthly wages'].fillna(df['Daily or Monthly wages'].median(), inplace=True)  # Replace NaN with median


Basic Needs Classification Accuracy: 1.0
Basic Needs Classification Report:
                precision    recall  f1-score   support

    High Need       1.00      1.00      1.00        11
     Low Need       1.00      1.00      1.00         7
Moderate Need       1.00      1.00      1.00        12

     accuracy                           1.00        30
    macro avg       1.00      1.00      1.00        30
 weighted avg       1.00      1.00      1.00        30

Livelihood Type Classification Accuracy: 0.3333333333333333
Livelihood Type Classification Report:
               precision    recall  f1-score   support

 Agriculture       0.50      0.38      0.43        13
    Business       0.29      0.33      0.31         6
       Labor       0.00      0.00      0.00         2

    accuracy                           0.33        21
   macro avg       0.26      0.24      0.25        21
weighted avg       0.39      0.33      0.36        21


Enter values to test classification:
Food (1 for Yes,