<a href="https://colab.research.google.com/github/BERGzie/unemployed_indian_DataModel/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import logging
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def load_and_prepare_data(filepath):
    logging.info("Loading dataset...")
    df = pd.read_csv(filepath)

    # Drop rows with missing values
    df.dropna(inplace=True)
    logging.info(f"Data shape after dropping NAs: {df.shape}")

    # --- Create a dummy target column ---
    # Example logic: if Age < 30 and no experience → more likely unemployed
    df['Target'] = ((df['Age'] < 30) & (df['Previous Experience'] == 0)).astype(int)

    # Encode categorical columns (e.g., Name, Degree)
    for col in df.select_dtypes(include='object').columns:
        df[col] = LabelEncoder().fit_transform(df[col])
        logging.info(f"Encoded column: {col}")

    # Separate features and target
    X = df.drop('Target', axis=1)
    y = df['Target']

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Save scaler for production use
    joblib.dump(scaler, 'scaler.pkl')

    # Train-test split
    return train_test_split(X_scaled, y, test_size=0.2, random_state=42)

def build_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')  # For binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def run_training(X_train, X_test, y_train, y_test):
    model = build_model(X_train.shape[1])
    logging.info("Training the model...")

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=50,
        batch_size=32,
        callbacks=[early_stop],
        verbose=1
    )

    # Save model
    model.save("unemployment_model.h5")
    logging.info("Model saved as 'unemployment_model.h5'")

    # Evaluate
    loss, accuracy = model.evaluate(X_test, y_test)
    logging.info(f"Test Accuracy: {accuracy:.4f}")

if __name__ == "__main__":
    filepath = "unemployed_indian_dataset.csv"
    X_train, X_test, y_train, y_test = load_and_prepare_data(filepath)
    run_training(X_train, X_test, y_train, y_test)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.9358 - loss: 0.4637 - val_accuracy: 1.0000 - val_loss: 0.1322
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0924 - val_accuracy: 1.0000 - val_loss: 0.0244
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0192 - val_accuracy: 1.0000 - val_loss: 0.0091
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0087 - val_accuracy: 1.0000 - val_loss: 0.0049
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0049 - val_accuracy: 1.0000 - val_loss: 0.0031
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0032 - val_accuracy: 1.0000 - val_loss: 0.0021
Epoch 7/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 2.2979e-05 
