<a href="https://colab.research.google.com/github/CodeBlockFaiz/DATASCIENCE-Projects/blob/main/Mini_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1) Install packages

In [1]:
!pip -q install streamlit pyngrok pandas numpy matplotlib scikit-learn torch statsmodels

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.7/188.7 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m95.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m112.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Config + utils

In [2]:
import os, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

# ==== Paths ====
ART_DIR = Path("imputed_outputs")
PLOT_DIR = Path("plots")
FORECAST_PLOT_DIR = Path("forecast_plots")
for d in [ART_DIR, PLOT_DIR, FORECAST_PLOT_DIR]:
    d.mkdir(exist_ok=True)

# ==== Your datasets ====
DATASETS = {
    "Bangalore": "Bangalore 2023-08-08 to 2025-08-08.csv",
    "Delhi":     "Delhi 2023-08-08 to 2025-08-08.csv",
}

# Columns
DATETIME_COL = "datetime"   # change if different in your CSVs
FEATURES = ["temp", "humidity", "windspeed", "winddir", "cloudcover", "dew", "precip"]

VAL_RATIO, TEST_RATIO = 0.1, 0.1

def train_val_test_split(df, val_ratio=0.1, test_ratio=0.1, sort_col="datetime"):
    if sort_col in df.columns:
        df = df.sort_values(sort_col)
    n = len(df)
    n_test = int(n * test_ratio)
    n_val = int(n * val_ratio)
    train = df.iloc[: n - n_val - n_test]
    val   = df.iloc[n - n_val - n_test : n - n_test]
    test  = df.iloc[n - n_test :]
    return train.copy(), val.copy(), test.copy()

def nrmse(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))
    denom = (y_true.max() - y_true.min()) or 1.0
    return float(rmse / denom)

def nmse(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mse = np.mean((y_true - y_pred)**2)
    denom = np.mean((y_true - np.mean(y_true))**2) or 1.0
    return float(mse / denom)

## 3) Imputers (Mean/Median/KNN + CGAN)

In [3]:
from sklearn.impute import KNNImputer
import torch
import torch.nn as nn
import torch.optim as optim

# --- Simple statistical imputers ---
def mean_impute(df, features):
    out = df.copy()
    for c in features:
        out[c] = out[c].fillna(out[c].mean())
    return out

def median_impute(df, features):
    out = df.copy()
    for c in features:
        out[c] = out[c].fillna(out[c].median())
    return out

def knn_impute(df, features, n_neighbors=5):
    out = df.copy()
    imputer = KNNImputer(n_neighbors=n_neighbors)
    out[features] = imputer.fit_transform(out[features])
    return out

# --- CGAN Imputer (fixed/stable) ---
class CGANImputer:
    def __init__(self, features, noise_dim=8, hidden=64, lr=1e-3, treat_zeros_as_nan=True):
        self.features = features
        self.noise_dim = noise_dim
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.treat_zeros_as_nan = treat_zeros_as_nan

        D_in = len(features)

        class Gen(nn.Module):
            def __init__(self, D_in, noise_dim, hidden):
                super().__init__()
                self.net = nn.Sequential(
                    nn.Linear(D_in + noise_dim, hidden), nn.ReLU(),
                    nn.Linear(hidden, hidden), nn.ReLU(),
                    nn.Linear(hidden, D_in)
                )
            def forward(self, x, z):
                return self.net(torch.cat([x, z], dim=1))

        class Disc(nn.Module):
            def __init__(self, D_in, hidden):
                super().__init__()
                self.net = nn.Sequential(
                    nn.Linear(D_in * 2, hidden), nn.LeakyReLU(0.2),
                    nn.Linear(hidden, hidden), nn.LeakyReLU(0.2),
                    nn.Linear(hidden, 1), nn.Sigmoid()
                )
            def forward(self, x, x_hat):
                return self.net(torch.cat([x, x_hat], dim=1))

        self.G = Gen(D_in, noise_dim, hidden).to(self.device)
        self.D = Disc(D_in, hidden).to(self.device)
        self.g_opt = optim.Adam(self.G.parameters(), lr=lr)
        self.d_opt = optim.Adam(self.D.parameters(), lr=lr)
        self.mse = nn.MSELoss()

    def _prepare_data(self, df):
        X = df[self.features].values.astype(np.float32)
        if self.treat_zeros_as_nan:
            X[X == 0] = np.nan
        M = ~np.isnan(X)

        self.means = np.nanmean(X, axis=0, keepdims=True)
        self.stds  = np.nanstd(X, axis=0, keepdims=True) + 1e-6

        Xn = (X - self.means) / self.stds
        Xn = np.nan_to_num(Xn, nan=0.0)
        return Xn, M

    def fit(self, df, epochs=200, batch_size=64, verbose=False):
        Xn, M = self._prepare_data(df)
        dl = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(
                torch.tensor(Xn, dtype=torch.float32),
                torch.tensor(M.astype(np.float32))
            ), batch_size=batch_size, shuffle=True
        )
        for ep in range(epochs):
            for xb, mb in dl:
                xb, mb = xb.to(self.device), mb.to(self.device)
                z = torch.randn((xb.size(0), self.noise_dim), device=self.device)

                # Generator pass
                x_hat = self.G(xb * mb, z)
                x_tilde = mb * xb + (1 - mb) * x_hat

                # Discriminator
                self.d_opt.zero_grad()
                real_score = self.D(xb, xb)
                fake_score = self.D(xb, x_tilde.detach())
                d_loss = -torch.mean(torch.log(real_score + 1e-6) + torch.log(1 - fake_score + 1e-6))
                d_loss.backward()
                self.d_opt.step()

                # Generator
                self.g_opt.zero_grad()
                fake_score = self.D(xb, x_tilde)
                adv_loss = -torch.mean(torch.log(fake_score + 1e-6))
                recon_loss = self.mse((1 - mb) * x_hat, (1 - mb) * xb)
                g_loss = adv_loss + recon_loss
                g_loss.backward()
                self.g_opt.step()

            if verbose and (ep+1) % max(1, epochs//10) == 0:
                print(f"Epoch {ep+1}/{epochs} D:{d_loss.item():.4f} G:{g_loss.item():.4f}")

    def transform(self, df):
        X = df[self.features].values.astype(np.float32)
        if self.treat_zeros_as_nan:
            X[X == 0] = np.nan
        M = ~np.isnan(X)

        Xn = (X - self.means) / self.stds
        Xn = np.nan_to_num(Xn, nan=0.0)

        with torch.no_grad():
            xb = torch.tensor(Xn, device=self.device)
            mb = torch.tensor(M.astype(np.float32), device=self.device)
            z = torch.randn((xb.size(0), self.noise_dim), device=self.device)
            x_hat = self.G(xb * mb, z)
            x_tilde = mb * xb + (1 - mb) * x_hat

        X_rec = x_tilde.cpu().numpy() * self.stds + self.means
        out = df.copy()
        out[self.features] = X_rec
        return out

## 4) Imputation loop (saves CSVs + plots with bigger imputed dots)

In [4]:
for city, filepath in DATASETS.items():
    print(f"\n=== Processing {city} ===")
    df = pd.read_csv(filepath)

    # Ensure datetime exists
    if DATETIME_COL in df.columns:
        df[DATETIME_COL] = pd.to_datetime(df[DATETIME_COL], errors="coerce")
    else:
        # try best-effort autodetect
        dt_col = [c for c in df.columns if "date" in c.lower() or "time" in c.lower()]
        if dt_col:
            df[DATETIME_COL] = pd.to_datetime(df[dt_col[0]], errors="coerce")
        else:
            df[DATETIME_COL] = pd.date_range("2023-01-01", periods=len(df), freq="D")

    # Keep only existing features
    feat = [f for f in FEATURES if f in df.columns]
    if not feat:
        raise ValueError(f"No expected features found in {filepath}. Found columns: {df.columns.tolist()}")

    print("NaNs before:", df[feat].isna().sum().to_dict())

    # Save baselines (optional)
    mean_impute(df, feat).to_csv(ART_DIR / f"{city}_imputed_mean.csv", index=False)
    median_impute(df, feat).to_csv(ART_DIR / f"{city}_imputed_median.csv", index=False)
    if len(feat) >= 2:
        knn_impute(df, feat, n_neighbors=5).to_csv(ART_DIR / f"{city}_imputed_knn.csv", index=False)

    # CGAN imputation
    cgan = CGANImputer(feat, treat_zeros_as_nan=True)
    cgan.fit(df, epochs=200, verbose=True)
    df_cgan = cgan.transform(df)
    df_cgan.to_csv(ART_DIR / f"{city}_imputed_cgan.csv", index=False)

    print("NaNs after (CGAN):", df_cgan[feat].isna().sum().to_dict())

    # Plots
    time = df[DATETIME_COL]
    for f in feat:
        plt.figure(figsize=(12, 4))
        plt.plot(time, df[f], label="Original", color="blue", alpha=0.6)
        miss = df[f].isna()
        if miss.any():
            plt.scatter(time[miss], [np.nanmean(df[f])] * miss.sum(), color="red", marker="x", s=60, label="Missing")
        plt.scatter(time, df_cgan[f], color="orange", s=36, alpha=0.8, label="CGAN Imputed")  # bigger dots
        plt.title(f"{city} — {f} (CGAN Imputation)")
        plt.xlabel("Time"); plt.ylabel(f)
        plt.legend(); plt.tight_layout()
        plt.savefig(PLOT_DIR / f"{city}_{f}_cgan_imputation.png", dpi=150)
        plt.close()

print("\n✅ Imputed CSVs at:", ART_DIR)
print("✅ Imputation plots at:", PLOT_DIR)


=== Processing Bangalore ===
NaNs before: {'temp': 0, 'humidity': 0, 'windspeed': 0, 'winddir': 0, 'cloudcover': 0, 'dew': 0, 'precip': 0}
Epoch 20/200 D:1.3863 G:0.6926
Epoch 40/200 D:1.3863 G:0.6928
Epoch 60/200 D:1.3863 G:0.6933
Epoch 80/200 D:1.3863 G:0.6937
Epoch 100/200 D:1.3863 G:0.6937
Epoch 120/200 D:1.3863 G:0.6934
Epoch 140/200 D:1.3863 G:0.6930
Epoch 160/200 D:1.3863 G:0.6975
Epoch 180/200 D:1.3863 G:0.6931
Epoch 200/200 D:1.3863 G:0.6930
NaNs after (CGAN): {'temp': 0, 'humidity': 0, 'windspeed': 0, 'winddir': 0, 'cloudcover': 0, 'dew': 0, 'precip': 0}

=== Processing Delhi ===
NaNs before: {'temp': 0, 'humidity': 0, 'windspeed': 0, 'winddir': 0, 'cloudcover': 0, 'dew': 0, 'precip': 0}
Epoch 20/200 D:1.3857 G:0.6948
Epoch 40/200 D:1.3863 G:0.6929
Epoch 60/200 D:1.3863 G:0.6931
Epoch 80/200 D:1.3863 G:0.6931
Epoch 100/200 D:1.3863 G:0.6927
Epoch 120/200 D:1.3863 G:0.6932
Epoch 140/200 D:1.3863 G:0.6930
Epoch 160/200 D:1.3863 G:0.6930
Epoch 180/200 D:1.3863 G:0.6933
Epoch 20

## 5) Bi-LSTM forecaster + training & saving predictions

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

class BiLSTMForecaster:
    def __init__(self, input_dim, hidden=64, layers=1, lr=1e-3):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = nn.LSTM(input_dim, hidden, num_layers=layers, batch_first=True, bidirectional=True).to(self.device)
        self.fc = nn.Linear(hidden*2, input_dim).to(self.device)
        self.crit = nn.MSELoss()
        self.opt = optim.Adam(list(self.model.parameters()) + list(self.fc.parameters()), lr=lr)

    def _make_seq(self, df, features, seq_len):
        X = df[features].values.astype(np.float32)
        seqs, targs = [], []
        for i in range(len(X) - seq_len):
            seqs.append(X[i:i+seq_len])
            targs.append(X[i+seq_len])
        return np.array(seqs), np.array(targs)

    def fit(self, df, features, seq_len=14, epochs=50, batch_size=64, verbose=False):
        X, y = self._make_seq(df, features, seq_len)
        X = torch.tensor(X, device=self.device)
        y = torch.tensor(y, device=self.device)
        for ep in range(1, epochs+1):
            self.model.train()
            perm = torch.randperm(len(X))
            losses = []
            for i in range(0, len(X), batch_size):
                idx = perm[i:i+batch_size]
                xb, yb = X[idx], y[idx]
                out, _ = self.model(xb)
                pred = self.fc(out[:, -1, :])
                loss = self.crit(pred, yb)
                self.opt.zero_grad(); loss.backward(); self.opt.step()
                losses.append(loss.item())
            if verbose and ep % 5 == 0:
                print(f"Epoch {ep}/{epochs} Loss: {np.mean(losses):.4f}")

    def forecast(self, hist_df, features, seq_len=14, steps=7):
        X = hist_df[features].values.astype(np.float32)
        seq = torch.tensor(X[-seq_len:], device=self.device).unsqueeze(0)
        preds = []
        self.model.eval()
        with torch.no_grad():
            for _ in range(steps):
                out, _ = self.model(seq)
                pred = self.fc(out[:, -1, :])
                preds.append(pred.cpu().numpy().ravel())
                seq = torch.cat([seq[:, 1:, :], pred.unsqueeze(1)], dim=1)
        return np.array(preds)

# Train & save per city (on CGAN-imputed data)
for city in DATASETS.keys():
    print(f"\n=== Bi-LSTM training for {city} ===")
    dfc = pd.read_csv(ART_DIR / f"{city}_imputed_cgan.csv")
    dfc[DATETIME_COL] = pd.to_datetime(dfc[DATETIME_COL], errors="coerce")
    dfc = dfc.sort_values(DATETIME_COL).reset_index(drop=True)

    feat = [f for f in FEATURES if f in dfc.columns]
    dfc[feat] = dfc[feat].replace([np.inf, -np.inf], np.nan).fillna(dfc[feat].mean())

    train, val, test = train_val_test_split(dfc, val_ratio=VAL_RATIO, test_ratio=TEST_RATIO, sort_col=DATETIME_COL)

    bl = BiLSTMForecaster(input_dim=len(feat), hidden=64, layers=1, lr=1e-3)
    bl.fit(train, feat, seq_len=14, epochs=50, batch_size=64, verbose=True)

    steps = len(test)
    hist_df = pd.concat([train, val], ignore_index=True)
    preds = bl.forecast(hist_df, feat, seq_len=14, steps=steps)

    pred_df = pd.DataFrame(preds, columns=feat)
    pred_df.insert(0, DATETIME_COL, test[DATETIME_COL].values)
    pred_df.to_csv(ART_DIR / f"{city}_bilstm_predictions.csv", index=False)

    # quick metrics
    print("\nAccuracy metrics:")
    for f in feat:
        print(f"{f:12}  nRMSE: {nrmse(test[f].values, pred_df[f].values):.4f}   nMSE: {nmse(test[f].values, pred_df[f].values):.4f}")

    # plots
    for f in feat:
        plt.figure(figsize=(10,4))
        plt.plot(test[DATETIME_COL], test[f].values, label="Actual", color="blue")
        plt.plot(test[DATETIME_COL], pred_df[f].values, label="Bi-LSTM Forecast", color="orange")
        plt.title(f"{city} — {f} Forecast"); plt.legend(); plt.tight_layout()
        plt.savefig(FORECAST_PLOT_DIR / f"{city}_{f}_forecast.png", dpi=150)
        plt.close()

print("\n✅ Bi-LSTM predictions saved in:", ART_DIR)
print("✅ Forecast plots in:", FORECAST_PLOT_DIR)


=== Bi-LSTM training for Bangalore ===
Epoch 5/50 Loss: 6479.7547
Epoch 10/50 Loss: 5867.8168
Epoch 15/50 Loss: 5333.9563
Epoch 20/50 Loss: 4873.2355
Epoch 25/50 Loss: 4438.6222
Epoch 30/50 Loss: 4063.4376
Epoch 35/50 Loss: 3748.4285
Epoch 40/50 Loss: 3468.2416
Epoch 45/50 Loss: 3223.9847
Epoch 50/50 Loss: 3005.1152

Accuracy metrics:
temp          nRMSE: 3.8262   nMSE: 304.5817
humidity      nRMSE: 1.6555   nMSE: 66.0948
windspeed     nRMSE: 0.1205   nMSE: 1.1360
winddir       nRMSE: 3.6133   nMSE: 441.7329
cloudcover    nRMSE: 0.7596   nMSE: 7.7405
dew           nRMSE: 6.9573   nMSE: 875.8728
precip        nRMSE: 0.1712   nMSE: 1.0426

=== Bi-LSTM training for Delhi ===
Epoch 5/50 Loss: 8838.0828
Epoch 10/50 Loss: 8215.4205
Epoch 15/50 Loss: 7647.9555
Epoch 20/50 Loss: 7200.7556
Epoch 25/50 Loss: 6768.2432
Epoch 30/50 Loss: 6388.7791
Epoch 35/50 Loss: 6076.9098
Epoch 40/50 Loss: 5790.2607
Epoch 45/50 Loss: 5536.5592
Epoch 50/50 Loss: 5281.9447

Accuracy metrics:
temp          nRMSE:

## 6) Create the Streamlit dashboard (reads CGAN-imputed data and does live Bi-LSTM forecast to a user-selected end date)

In [6]:
dashboard_code = r"""
import streamlit as st
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import timedelta
from pathlib import Path
import matplotlib.pyplot as plt

ART_DIR = Path("imputed_outputs")
DATETIME_COL = "datetime"
FEATURES = ["temp","humidity","windspeed","winddir","cloudcover","dew","precip"]

class BiLSTMForecaster:
    def __init__(self, input_dim, hidden=64, layers=1, lr=1e-3):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = nn.LSTM(input_dim, hidden, num_layers=layers, batch_first=True, bidirectional=True).to(self.device)
        self.fc = nn.Linear(hidden*2, input_dim).to(self.device)
        self.crit = nn.MSELoss()
        self.opt = optim.Adam(list(self.model.parameters()) + list(self.fc.parameters()), lr=lr)

    def fit(self, df, features, seq_len=14, epochs=50, batch_size=64, verbose=False):
        X = df[features].values.astype(np.float32)
        seqs, targs = [], []
        for i in range(len(X)-seq_len):
            seqs.append(X[i:i+seq_len]); targs.append(X[i+seq_len])
        X_t = torch.tensor(np.array(seqs), device=self.device)
        y_t = torch.tensor(np.array(targs), device=self.device)
        for ep in range(1, epochs+1):
            self.model.train()
            perm = torch.randperm(len(X_t))
            losses = []
            for i in range(0, len(X_t), batch_size):
                idx = perm[i:i+batch_size]
                xb, yb = X_t[idx], y_t[idx]
                out, _ = self.model(xb)
                pred = self.fc(out[:, -1, :])
                loss = self.crit(pred, yb)
                self.opt.zero_grad(); loss.backward(); self.opt.step()
                losses.append(loss.item())
            if verbose and ep % 5 == 0:
                print(f"Epoch {ep}/{epochs} Loss: {np.mean(losses):.4f}")

    def forecast(self, hist_df, features, seq_len=14, steps=7):
        X = hist_df[features].values.astype(np.float32)
        seq = torch.tensor(X[-seq_len:], device=self.device).unsqueeze(0)
        preds = []
        self.model.eval()
        with torch.no_grad():
            for _ in range(steps):
                out, _ = self.model(seq)
                pred = self.fc(out[:, -1, :])
                preds.append(pred.cpu().numpy().ravel())
                seq = torch.cat([seq[:, 1:, :], pred.unsqueeze(1)], dim=1)
        return np.array(preds)

st.title("🌤 Live Weather Forecast — Bi-LSTM on CGAN-Imputed Data")

files = list(ART_DIR.glob("*_imputed_cgan.csv"))
cities = [f.stem.replace("_imputed_cgan","") for f in files]
if not cities:
    st.error(f"No CGAN-imputed datasets in {ART_DIR}. Run the notebook blocks first.")
    st.stop()

city = st.selectbox("City", cities)
df = pd.read_csv(ART_DIR / f"{city}_imputed_cgan.csv")
df[DATETIME_COL] = pd.to_datetime(df[DATETIME_COL], errors="coerce")
df = df.sort_values(DATETIME_COL).reset_index(drop=True)

# Clean
df[FEATURES] = df[FEATURES].replace([np.inf, -np.inf], np.nan).fillna(df[FEATURES].mean())

last_date = df[DATETIME_COL].max().date()
st.caption(f"Last historical date: {last_date}")
horizon = st.slider("Days to forecast beyond last date", min_value=1, max_value=30, value=7)

# Train & forecast live
seq_len = 14
bl = BiLSTMForecaster(input_dim=len(FEATURES), hidden=64, layers=1, lr=1e-3)
bl.fit(df, FEATURES, seq_len=seq_len, epochs=40, batch_size=64, verbose=False)

preds = bl.forecast(df, FEATURES, seq_len=seq_len, steps=horizon)
future_dates = pd.date_range(df[DATETIME_COL].max() + pd.Timedelta(days=1), periods=horizon, freq="D")
pred_df = pd.DataFrame(preds, columns=FEATURES)
pred_df.insert(0, DATETIME_COL, future_dates)

st.subheader(f"Forecast for {city}: next {horizon} day(s)")
st.dataframe(pred_df)

# Plots: full history + forecast
fig, axes = plt.subplots(len(FEATURES), 1, figsize=(10, len(FEATURES)*2.3), sharex=True)
for i, feat in enumerate(FEATURES):
    axes[i].plot(df[DATETIME_COL], df[feat], label="Historical", alpha=0.6)
    axes[i].plot(pred_df[DATETIME_COL], pred_df[feat], label="Forecast", marker="o")
    axes[i].set_ylabel(feat); axes[i].grid(True, alpha=0.3)
axes[0].legend()
plt.tight_layout()
st.pyplot(fig)
"""
with open("dashboard.py", "w") as f:
    f.write(dashboard_code)

print("✅ dashboard.py written")

✅ dashboard.py written


## 7) Launch Streamlit in Colab via ngrok

In [7]:
# 🚀 Combined Colab cell: Start Streamlit + Ngrok in one go

# 1️⃣ Install required packages
!pip install -q streamlit pyngrok

# 2️⃣ Import
import subprocess, threading
from pyngrok import ngrok

# 3️⃣ Set your NGROK_AUTH_TOKEN
NGROK_AUTH_TOKEN = "31EacTUezPiEa0iT3tRIeKlUzLY_3qfozLwY4pLMpgSEQBtDV"  # <-- REPLACE THIS
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# 4️⃣ Function to run Streamlit
def run_streamlit():
    # Replace 'dashboard.py' with your actual dashboard file name
    subprocess.run(["streamlit", "run", "dashboard.py", "--server.port=8501"])

# 5️⃣ Start Streamlit in a separate thread
threading.Thread(target=run_streamlit, daemon=True).start()

# 6️⃣ Start ngrok tunnel
ngrok.kill()  # ensure no old tunnels are running
public_url = ngrok.connect(8501)
print("🔗 Open this URL to view the dashboard:\n", public_url)


🔗 Open this URL to view the dashboard:
 NgrokTunnel: "https://f67fa65d0ce6.ngrok-free.app" -> "http://localhost:8501"
