In [1]:
%%writefile app.py
import streamlit as st
import plotly.express as px
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from

# Load dataset
@st.cache_data  # Cache the dataset for better performance
def load_data():
    df = pd.read_csv('/content/CPIndex_Jan13-To-Jan25.csv', dtype=str)
    # Drop unnecessary columns
    df.drop(columns=['Unnamed: 10', 'Unnamed: 11'], inplace=True, errors='ignore')
    # Convert 'Year' column to numeric
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    # Drop rows with missing values in the 'Year' column
    df.dropna(subset=['Year'], inplace=True)
    return df

df = load_data()

# Print the first few rows and column names for debugging
st.write("Dataset Preview:")
st.write(df.head())
st.write("Columns in Dataset:")
st.write(df.columns.tolist())

# Encode categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
label = LabelEncoder()
for col in categorical_cols:
    df[col] = label.fit_transform(df[col])

# Function to train model and return predictions
def predictor(feature):
    if feature not in df.columns:
        raise ValueError(f"Column '{feature}' not found in DataFrame.")

    X = df.drop(columns=[feature])
    y = df[feature].dropna()
    X = X.loc[y.index]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)

    return pd.DataFrame({"Year": X_test["Year"], "Actual": y_test, "Predicted": y_pred})

# Streamlit UI
st.title("Inflation Prediction Using CPI")

# Selectbox for Target Feature
choice = st.selectbox('Select the target feature', options=['Rural', 'Urban', 'Combined'])

# Generate results
try:
    results_df = predictor(choice)
    st.write("Prediction Results:")
    st.write(results_df)

    # Plot using Plotly Express
    fig = px.line(results_df, x="Year", y=["Actual", "Predicted"], markers=True,
                  labels={"value": f"{choice} Value", "variable": "Legend"},
                  title=f"Actual vs Predicted {choice} Values Over Time")

    # Update axis labels
    fig.update_layout(
        xaxis_title="Year",
        yaxis_title=f"{choice} Value"
    )

    # Display in Streamlit
    st.plotly_chart(fig)

except ValueError as e:
    st.error(str(e))

Writing app.py


In [1]:
%%writefile app.py
import streamlit as st
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
@st.cache_data
def load_data():
    df = pd.read_csv('/content/CPIndex_Jan13-To-Jan25.csv', dtype=str)
    df.drop(columns=['Unnamed: 10', 'Unnamed: 11'], inplace=True, errors='ignore')
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df.dropna(subset=['Year'], inplace=True)
    return df

df = load_data()

# Raw Dataset
st.title("Inflation Prediction Using CPI")
st.subheader("📄 Raw Dataset Preview")
st.write(df.head())
st.write("Columns in Dataset:")
st.write(df.columns.tolist())

# Encode categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
label = LabelEncoder()
for col in categorical_cols:
    df[col] = label.fit_transform(df[col])

# Preprocessed Dataset
st.subheader("🧹 Preprocessed Dataset")
st.write(df.head())

# Feature importance function
def show_feature_importance(model, X):
    importance_df = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)
    return importance_df

# Prediction function with metrics
def predictor(feature, test_ratio):
    if feature not in df.columns:
        raise ValueError(f"Column '{feature}' not found in DataFrame.")

    X = df.drop(columns=[feature])
    y = df[feature].dropna()
    X = X.loc[y.index]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    # Evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    result_df = pd.DataFrame({
        "Year": X_test["Year"].values,
        "Actual": y_test.values,
        "Predicted": y_pred
    })

    importance_df = show_feature_importance(model, X)

    metrics = {"MAE": mae, "MSE": mse, "R² Score": r2}
    return result_df.sort_values("Year"), importance_df, metrics

# User selection
choice = st.selectbox('🎯 Select the target feature', options=['Rural', 'Urban', 'Combined'])

try:
    # Feature importance
    _, feature_importance, _ = predictor(choice, test_ratio=0.3)
    st.subheader("📊 Feature Importance")
    st.write(feature_importance)

    # 70/30
    st.subheader("📈 Results for 70/30 Train-Test Split")
    results_70, _, metrics_70 = predictor(choice, test_ratio=0.3)
    st.write(results_70)
    st.write("🔍 Metrics:", metrics_70)
    fig_70 = px.line(results_70, x="Year", y=["Actual", "Predicted"], markers=True,
                     title=f"70/30: Actual vs Predicted {choice}")
    st.plotly_chart(fig_70)

    # 80/20
    st.subheader("📈 Results for 80/20 Train-Test Split")
    results_80, _, metrics_80 = predictor(choice, test_ratio=0.2)
    st.write(results_80)
    st.write("🔍 Metrics:", metrics_80)
    fig_80 = px.line(results_80, x="Year", y=["Actual", "Predicted"], markers=True,
                     title=f"80/20: Actual vs Predicted {choice}")
    st.plotly_chart(fig_80)

    # 90/10
    st.subheader("📈 Results for 90/10 Train-Test Split")
    results_90, _, metrics_90 = predictor(choice, test_ratio=0.1)
    st.write(results_90)
    st.write("🔍 Metrics:", metrics_90)
    fig_90 = px.line(results_90, x="Year", y=["Actual", "Predicted"], markers=True,
                     title=f"90/10: Actual vs Predicted {choice}")
    st.plotly_chart(fig_90)

except ValueError as e:
    st.error(str(e))


Writing app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501 --subdomain aditya


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.142.208.236:8501[0m
[0m
[1G[0K⠸[1G[0K⠼[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gy

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0Kyour url is: https://aditya.loca.lt


In [3]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m115.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m120.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hIn