In [1]:
# train_model.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import os

# ------------------------------------------------------
# CONFIGURATION
# ------------------------------------------------------
CSV_PATH = "Financial_inclusion_dataset.csv"  # dataset file name
TARGET_COL = "bank_account"                   # change if target differs
MODEL_OUT = "fi_pipeline.joblib"
SAMPLE_OUT = "sample_input.csv"
RANDOM_STATE = 42
TEST_SIZE = 0.2
# ------------------------------------------------------

def main():
    # 1. Load dataset
    df = pd.read_csv(CSV_PATH)
    print("✅ Loaded dataset:", df.shape)
    print(df.head())

    # 2. Check missing values and data types
    print("\n--- Dataset Info ---")
    print(df.info())

    # 3. Drop duplicates
    before = len(df)
    df.drop_duplicates(inplace=True)
    print(f"Dropped {before - len(df)} duplicate rows.")

    # 4. Ensure target column exists
    if TARGET_COL not in df.columns:
        raise ValueError(f"Target column '{TARGET_COL}' not found! Please update TARGET_COL.")

    # 5. Split features and target
    X = df.drop(columns=[TARGET_COL])
    y = df[TARGET_COL]

    # Encode target if it’s categorical (e.g. “Yes”/“No”)
    if y.dtype == 'object':
        y = y.map({'Yes': 1, 'No': 0}).fillna(0).astype(int)

    # 6. Detect numeric and categorical columns
    numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
    categorical_cols = X.select_dtypes(exclude=['int64', 'float64']).columns.tolist()
    print(f"Numeric columns: {len(numeric_cols)} | Categorical columns: {len(categorical_cols)}")

    # 7. Preprocessing pipelines
    numeric_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    categorical_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])

    preprocessor = ColumnTransformer([
        ('num', numeric_pipeline, numeric_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

    # 8. Full model pipeline
    model = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(
            n_estimators=200,
            random_state=RANDOM_STATE,
            n_jobs=-1
        ))
    ])

    # 9. Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
    )

    # 10. Train model
    print("\n🔄 Training model ...")
    model.fit(X_train, y_train)
    print("✅ Model training complete.")

    # 11. Evaluate
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print("\n--- Evaluation ---")
    print(f"Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))

    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"Confusion Matrix (Accuracy = {acc:.3f})")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    print("📊 Saved confusion_matrix.png")

    


In [7]:
!python train_model.py



python: can't open file 'c:\\Users\\Rafimbi\\Downloads\\RydaMtaani\\train_model.py': [Errno 2] No such file or directory


In [13]:
pip install pandas numpy matplotlib seaborn scikit-learn ydata-profiling streamlit joblib


Note: you may need to restart the kernel to use updated packages.


In [14]:
# File paths
DATA_PATH = "Financial_inclusion_dataset.csv"
MODEL_OUT = "financial_model.pkl"
SAMPLE_OUT = "sample_input.csv"

In [15]:
def main():
    print("📥 Loading dataset...")
    df = pd.read_csv(DATA_PATH)
    print(df.head())

In [18]:
# Basic info
print("\n🔍 Dataset Info:")
  


🔍 Dataset Info:


In [22]:
# Basic info
import pandas as pd
import os

if not os.path.exists(DATA_PATH):
	print(f"❌ File not found: {DATA_PATH}")
else:
	df = pd.read_csv(DATA_PATH)
	print(df.info())
   

❌ File not found: Financial_inclusion_dataset.csv


In [23]:
# Path to your dataset
DATA_PATH = "Financial_inclusion_dataset.csv"

In [25]:
   print("\n🔹 First 5 rows:")


🔹 First 5 rows:


In [34]:
!python train_model.py


python: can't open file 'c:\\Users\\Rafimbi\\Downloads\\RydaMtaani\\train_model.py': [Errno 2] No such file or directory


In [35]:
# app.py
import streamlit as st
import pandas as pd
import joblib

In [37]:
import os

# Load model
if os.path.exists(MODEL_OUT):
	model = joblib.load(MODEL_OUT)
	print("✅ Model loaded successfully.")
else:
	print(f"❌ Model file not found: {MODEL_OUT}")

❌ Model file not found: financial_model.pkl


In [38]:
st.title("💰 Financial Inclusion Predictor")

2025-10-16 19:32:16.464 
  command:

    streamlit run C:\Users\Rafimbi\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [39]:
st.write("Enter demographic details to predict if an individual has a bank account.")



In [43]:
print(os.getcwd()) 

c:\Users\Rafimbi\Downloads\RydaMtaani


In [44]:
print(os.listdir())

['.git', 'financial_inclusion_model.pkl', 'RydaMtaani', 'third.txt']


In [45]:
import pandas as pd

In [47]:
df = pd.read_csv(r"C:\Users\Rafimbi\Downloads\Financial_inclusion_dataset.csv")


In [48]:
sample = df.sample(1, random_state=42)

In [49]:
sample.to_csv("sample_input.csv", index=False)

In [50]:
print("✅ Sample input created successfully: sample_input.csv")

✅ Sample input created successfully: sample_input.csv


In [51]:
print(sample)

     country  year      uniqueid bank_account location_type cellphone_access  \
6692  Rwanda  2016  uniqueid_625           No         Rural              Yes   

      household_size  age_of_respondent gender_of_respondent  \
6692               7                 40                 Male   

     relationship_with_head           marital_status      education_level  \
6692      Head of Household  Married/Living together  No formal education   

                 job_type  
6692  Informally employed  


In [54]:
cd "C:\Users\Rafimbi\Documents\streamlit"


C:\Users\Rafimbi\Documents\streamlit


In [56]:
!streamlit run app.py


Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py
