<a href="https://colab.research.google.com/github/Aksharma127/Machine-Learning/blob/main/geo_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [105]:
# Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import os # Import the os module if it wasn't already imported

# Create the data directory if it doesn't exist
os.makedirs('data', exist_ok=True)

# Load CSV
# Update the path below if your file is located elsewhere
file_path = "data/sikkim_simulated_business_data.csv"

# Check if the file exists before attempting to read it
if not os.path.exists(file_path):
    print(f"Error: The file '{file_path}' was not found.")
    print("Please make sure the CSV file 'sikkim_simulated_business_data.csv' is in the 'data' directory.")
    # You might want to exit or handle this error appropriately
    # For example, raise an exception or use a placeholder dataframe
    raise FileNotFoundError(f"Required data file not found at {file_path}")

df = pd.read_csv(file_path)
# Targets
target_score = "business_attractiveness_score"
target_profit = "estimated_yearly_profit_usd"
target_risk = "risk_level"
target_recommendation = "recommended_business_type"

# Features
num = [
    'population_density_sqkm', 'avg_household_income_usd', 'median_age',
    'literacy_rate_percent', 'crime_rate_index', 'infrastructure_quality_score',
    'accessibility_score', 'unemployment_rate_percent', 'local_gdp_per_capita_usd'
]
cat = ['area_type']
features = num + cat

X = df[features]
y_score = df[target_score]
y_profit = df[target_profit]

risk_le = LabelEncoder()
rec_le = LabelEncoder()
y_risk = risk_le.fit_transform(df[target_risk])
y_rec = rec_le.fit_transform(df[target_recommendation])

# Preprocessing
pre = ColumnTransformer([
    ("num", StandardScaler(), num),
    ("cat", OneHotEncoder(), cat)
])
X_proc = pre.fit_transform(X)

# Save encoders
# Ensure the directory for saving models exists
os.makedirs('geo_model', exist_ok=True)
joblib.dump(pre, "geo_model/preprocessor.pkl")
joblib.dump(risk_le, "geo_model/risk_encoder.pkl")
joblib.dump(rec_le, "geo_model/rec_encoder.pkl")

# Splits
X_train, X_test, y_score_train, y_score_test = train_test_split(X_proc, y_score, test_size=0.2, random_state=42)
_, _, y_profit_train, y_profit_test = train_test_split(X_proc, y_profit, test_size=0.2, random_state=42)
_, _, y_risk_train, y_risk_test = train_test_split(X_proc, y_risk, test_size=0.2, random_state=42)
_, _, y_rec_train, y_rec_test = train_test_split(X_proc, y_rec, test_size=0.2, random_state=42)

# Build model factory
def build_model(output_units, output_activation, loss, task='regression'):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_proc.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(output_units, activation=output_activation)
    ])
    # Use appropriate metrics based on the task
    metrics = ['accuracy'] if task == 'classification' else ['mse']
    model.compile(optimizer='adam', loss=loss, metrics=metrics)
    return model

# Models
score_model = build_model(1, 'linear', 'mse', 'regression')
profit_model = build_model(1, 'linear', 'mse', 'regression')
# For classification tasks, the number of output units should be the number of unique classes
risk_model = build_model(len(np.unique(y_risk)), 'softmax', 'sparse_categorical_crossentropy', 'classification')
rec_model = build_model(len(np.unique(y_rec)), 'softmax', 'sparse_categorical_crossentropy', 'classification')


# Early stopping
es = EarlyStopping(patience=10, restore_best_weights=True)

# Train
print("Training score model...")
score_model.fit(X_train, y_score_train, epochs=100, validation_split=0.2, callbacks=[es], verbose=0) # Add verbose=0 to reduce training output
print("Training profit model...")
profit_model.fit(X_train, y_profit_train, epochs=100, validation_split=0.2, callbacks=[es], verbose=0)
print("Training risk model...")
risk_model.fit(X_train, y_risk_train, epochs=100, validation_split=0.2, callbacks=[es], verbose=0)
print("Training recommendation model...")
rec_model.fit(X_train, y_rec_train, epochs=100, validation_split=0.2, callbacks=[es], verbose=0)


# Save models
# Ensure the directory for saving models exists
os.makedirs('geo_model', exist_ok=True)
score_model.save("geo_model/score_model.h5")
profit_model.save("geo_model/profit_model.h5")
risk_model.save("geo_model/risk_model.h5")
rec_model.save("geo_model/rec_model.h5")

print(" All deep learning models trained and saved!")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training score model...
Training profit model...
Training risk model...
Training recommendation model...




 All deep learning models trained and saved!


In [106]:
!pip install streamlit ngrok




In [107]:
import folium

m = folium.Map(location=[27.5, 88.5], zoom_start=8)  # Somewhere in Sikkim
folium.Marker([27.5, 88.5], tooltip="Your Prediction Point").add_to(m)
m


In [108]:
!pip install pyngrok




In [109]:
!python3 app.py &

 * Serving Flask app 'app'
 * Debug mode: off
Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.


In [128]:
from pyngrok import conf, ngrok

conf.get_default().auth_token = "YOUR_AUTHTOKEN"


In [123]:
!ngrok config add-authtoken YOUR_AUTHTOKEN


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [134]:
%%writefile app.py
from flask import Flask, request, jsonify
app = Flask(__name__)

@app.route('/')
def home():
    return "✅ Geo Business Advisor Flask App is running!"

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    # Dummy output — replace with real model inference
    return jsonify({
        "business_opportunity_score": 85,
        "estimated_profit_usd": 12000,
        "risk_level": "Medium",
        "recommended_business_type": "Cafe"
    })

if __name__ == '__main__':
    app.run(port=5000)


Overwriting app.py


In [135]:
score_model.save("geo_model/score_model.h5")
profit_model.save("geo_model/profit_model.h5")
risk_model.save("geo_model/risk_model.h5")
rec_model.save("geo_model/rec_model.h5")




In [136]:
from pyngrok import ngrok
public_url = ngrok.connect(5000)
print(f"🌐 Public URL: {public_url}")

!python3 app.py &


🌐 Public URL: NgrokTunnel: "https://7245-34-60-226-195.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app 'app'
 * Debug mode: off
Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.


In [140]:
import requests

# Change the URL to target the /predict endpoint
url = "https://7245-34-60-226-195.ngrok-free.app/predict"

payload = {
    "population_density_sqkm": 2100,
    "avg_household_income_usd": 9500,
    "median_age": 30,
    "literacy_rate_percent": 89,
    "crime_rate_index": 15,
    "infrastructure_quality_score": 75,
    "accessibility_score": 80,
    "unemployment_rate_percent": 6.5,
    "local_gdp_per_capita_usd": 3400,
    "area_type": "urban"
}

response = requests.post(url, json=payload)
print(response.json())

{'business_opportunity_score': 85, 'estimated_profit_usd': 12000, 'recommended_business_type': 'Cafe', 'risk_level': 'Medium'}
