<a href="https://colab.research.google.com/github/PunitTak2005/CPP-GFG/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
from flask import Flask, request, jsonify, render_template
import os

# Define the Flask app for deployment
app = Flask(__name__)

# 1. Data Loading and Preprocessing
def load_and_preprocess_data(file_path):
    """Loads and preprocesses the data."""
    # Check if the dataset exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Dataset not found at path: {file_path}")

    # Load data
    data = pd.read_csv(file_path)

    # Inspect the first few rows and data types of the dataset
    print("Dataset loaded successfully. Here are the first few rows:")
    print(data.head())
    print("Data types:\n", data.dtypes)

    # Handle missing values: fill numeric columns with mean and categorical columns with mode
    for column in data.columns:
        if data[column].dtype in [np.float64, np.int64]:  # Numeric columns
            data[column].fillna(data[column].mean(), inplace=True)
        else:  # Categorical columns
            data[column].fillna(data[column].mode()[0], inplace=True)

    # Separate features and target variable
    X = data.drop('poverty_status', axis=1)
    y = data['poverty_status']

    # Encoding categorical features (one-hot encoding)
    X = pd.get_dummies(X, drop_first=True)

    return X, y

# 2. Model Training
def train_model(X, y):
    """Trains a machine learning model and returns the trained model."""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    # Save the trained model
    joblib.dump(model, 'poverty_model.pkl')
    return model

# 3. Load and Predict with the Model
def predict_poverty(input_data, model):
    """Predicts poverty status based on input data."""
    prediction = model.predict(input_data)
    return prediction

# 4. API Endpoint for Prediction
@app.route('/predict', methods=['POST'])
def api_predict():
    """API endpoint to predict poverty status based on user data."""
    data = request.json  # JSON input from the request
    input_df = pd.DataFrame(data, index=[0])  # Convert JSON to DataFrame
    model = joblib.load('poverty_model.pkl')
    # Ensure the input data is preprocessed in the same way as the training data
    input_df = pd.get_dummies(input_df, drop_first=True)
    prediction = predict_poverty(input_df, model)
    return jsonify({'prediction': int(prediction[0])})

# 5. Home Page for the Web App
@app.route('/')
def home():
    return render_template('index.html')  # Ensure 'index.html' is in the templates folder

# 6. Main Function
if __name__ == "__main__":
    # Load and preprocess data
    try:
        X, y = load_and_preprocess_data('/content/dataset.csv')
        # Train the model
        model = train_model(X, y)
        # Run Flask app
        app.run(debug=True)
    except Exception as e:
        print("An error occurred:", e)


Dataset loaded successfully. Here are the first few rows:
  Region_Code  Population  Average_Income  Unemployment_Rate  \
0        R001       16795            2367               7.13   
1        R002        1860            2152              18.14   
2        R003       77820            3027               5.44   
3        R004       55886            3695              12.95   
4        R005        7265            2495               0.01   

  Access_to_Clean_Water  Healthcare_Facilities  \
0                   Yes                      7   
1                   Yes                     12   
2                   Yes                      0   
3                   Yes                     15   
4                   Yes                      6   

   Distance_to_Nearest_Hospital_km  Literacy_Rate  School_Enrollment_Rate  \
0                            19.46          95.55                   68.25   
1                             0.54          91.13                   74.57   
2                        

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].fillna(data[column].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].fillna(data[column].mean(), inplace=True)
