In [2]:
!pip uninstall torch -y
!pip cache purge  # Optional: to clear pip cache

!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Found existing installation: torch 2.8.0+cu126
Uninstalling torch-2.8.0+cu126:
  Successfully uninstalled torch-2.8.0+cu126
[0mFiles removed: 0
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch
  Downloading https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp312-cp312-linux_x86_64.whl (780.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.4/780.4 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m85.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# ==============================
# 1. Upload dataset in Colab
# ==============================
from google.colab import files
uploaded = files.upload()   # Choose weatherHistory.csv

# ==============================
# 2. Load dataset
# ==============================
df = pd.read_csv("weatherHistory.csv")

# Keep only numerical features
numerical_features = [
    "Temperature (C)", "Apparent Temperature (C)", "Humidity",
    "Wind Speed (km/h)", "Wind Bearing (degrees)", "Visibility (km)",
    "Pressure (millibars)"
]

df = df[numerical_features].dropna()  # Drop missing values
print("Dataset shape:", df.shape)
print(df.head())

# ==============================
# 3. Normalize numerical features
# ==============================
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df[numerical_features])

data_tensor = torch.tensor(data_scaled, dtype=torch.float32)
dataset = TensorDataset(data_tensor)

batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# ==============================
# 4. Define GAN models
# ==============================
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Linear(256, output_dim),
            nn.Sigmoid()
        )

    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# ==============================
# 5. Setup
# ==============================
latent_dim = 100
data_dim = len(numerical_features)
num_epochs = 1000

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

G = Generator(latent_dim, data_dim).to(device)
D = Discriminator(data_dim).to(device)

criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=0.0002)
optimizer_D = optim.Adam(D.parameters(), lr=0.0002)

# ==============================
# 6. Training Loop
# ==============================
for epoch in range(num_epochs):
    for i, (real_samples,) in enumerate(dataloader):
        real_samples = real_samples.to(device)
        batch_size_curr = real_samples.size(0)

        # --- Train Discriminator ---
        optimizer_D.zero_grad()
        labels_real = torch.ones(batch_size_curr, 1).to(device)
        output_real = D(real_samples)
        loss_real = criterion(output_real, labels_real)

        noise = torch.randn(batch_size_curr, latent_dim).to(device)
        fake_samples = G(noise)
        labels_fake = torch.zeros(batch_size_curr, 1).to(device)
        output_fake = D(fake_samples.detach())
        loss_fake = criterion(output_fake, labels_fake)

        loss_D = (loss_real + loss_fake) / 2
        loss_D.backward()
        optimizer_D.step()

        # --- Train Generator ---
        optimizer_G.zero_grad()
        output_fake_for_G = D(fake_samples)
        loss_G = criterion(output_fake_for_G, labels_real)
        loss_G.backward()
        optimizer_G.step()

    if epoch % 10 == 0 or epoch == num_epochs - 1:
        print(f"[Epoch {epoch}/{num_epochs}] D_loss: {loss_D.item():.4f}, G_loss: {loss_G.item():.4f}")

# ==============================
# 7. Generate synthetic data
# ==============================
G.eval()
with torch.no_grad():
    noise = torch.randn(5000, latent_dim).to(device)
    synthetic_data = G(noise).cpu().numpy()

# Inverse transform to original scale
synthetic_data_original_scale = scaler.inverse_transform(synthetic_data)

synthetic_df = pd.DataFrame(synthetic_data_original_scale, columns=numerical_features)
print("\nSynthetic data (first 10 rows):")
print(synthetic_df.head(10))

# ==============================
# 8. Save synthetic data to CSV
# ==============================
synthetic_df.to_csv("synthetic_weather.csv", index=False)
from google.colab import files
files.download("synthetic_weather.csv")


Saving weatherHistory.csv to weatherHistory (1).csv
Dataset shape: (96453, 7)
   Temperature (C)  Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \
0         9.472222                  7.388889      0.89            14.1197   
1         9.355556                  7.227778      0.86            14.2646   
2         9.377778                  9.377778      0.89             3.9284   
3         8.288889                  5.944444      0.83            14.1036   
4         8.755556                  6.977778      0.83            11.0446   

   Wind Bearing (degrees)  Visibility (km)  Pressure (millibars)  
0                   251.0          15.8263               1015.13  
1                   259.0          15.8263               1015.63  
2                   204.0          14.9569               1015.94  
3                   269.0          15.8263               1016.41  
4                   259.0          15.8263               1016.51  
Using device: cuda
[Epoch 0/1000] D_loss: 0.6119, G_loss:

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


True
Tesla T4


In [None]:
# Save synthetic data to a CSV file
synthetic_df.to_csv("synthetic_weather_data.csv", index=False)

# Download the file (for Colab)
from google.colab import files
files.download("synthetic_weather_data.csv")

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

rmse_scores = {}
for col in numerical_features:
    mse = mean_squared_error(real_numerical[col], synthetic_numerical[col])
    rmse = np.sqrt(mse)
    rmse_scores[col] = rmse

# Display
print("🔍 RMSE per feature:")
for k, v in rmse_scores.items():
    print(f"{k}: {v:.4f}")

# Optional: overall mean RMSE
mean_rmse = np.mean(list(rmse_scores.values()))
print(f"\n⚡ Mean RMSE across all numerical features: {mean_rmse:.4f}")