<a href="https://colab.research.google.com/github/Anurag07-crypto/chatbot/blob/main/Artificial_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [57]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np



In [3]:
df = pd.read_csv("/content/sample_data/california_housing_train.csv")
df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.40,19.0,7650.0,1901.0,1129.0,463.0,1.8200,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.9250,65500.0
...,...,...,...,...,...,...,...,...,...
16995,-124.26,40.58,52.0,2217.0,394.0,907.0,369.0,2.3571,111400.0
16996,-124.27,40.69,36.0,2349.0,528.0,1194.0,465.0,2.5179,79000.0
16997,-124.30,41.84,17.0,2677.0,531.0,1244.0,456.0,3.0313,103600.0
16998,-124.30,41.80,19.0,2672.0,552.0,1298.0,478.0,1.9797,85800.0


In [5]:
X = df.drop(columns=["median_house_value"])
y = df["median_house_value"]

In [13]:
sc = StandardScaler()


In [58]:
X = sc.fit_transform(X)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y.values.reshape(-1, 1)).flatten()

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train

array([[ 0.74415376, -0.84931657, -0.52352266, ..., -0.10853098,
        -0.3464724 ,  2.44071114],
       [ 0.96359337, -0.99436094, -1.31802049, ...,  0.06658371,
        -0.02398365,  1.73875647],
       [ 1.23290563, -1.42949407, -0.92077158, ..., -0.93444503,
        -1.07727353,  0.66255523],
       ...,
       [ 0.70425565, -0.80252806,  0.74767387, ..., -0.58334443,
        -0.76258693, -0.48177659],
       [ 1.24288015, -1.43417292, -1.55636984, ...,  1.62344911,
         0.88106544, -0.411078  ],
       [-1.42031879,  0.99415067,  1.85997083, ...,  0.4377223 ,
         0.12425715, -0.78259933]])

In [22]:
# Convert numpy arrays and pandas Series into torch tensors
X_train_tensor = torch.from_numpy(X_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_train_tensor = torch.from_numpy(y_train.values).float().unsqueeze(1)
y_test_tensor = torch.from_numpy(y_test.values).float().unsqueeze(1)

In [46]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = features
    self.labels = labels
  def __len__(self):
    return len(self.features)
  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [47]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [48]:
# Loader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [59]:
class MainModel(nn.Module):
  def __init__(self, input_features):
    super().__init__()
    self.input_features = input_features
    self.ann = nn.Sequential(
        nn.Linear(self.input_features, 128),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 1),
    )

  def forward(self, features):
    output = self.ann(features)
    return output

In [64]:
model = MainModel(X_train.shape[1])

In [65]:
learning_rate = 0.001
epochs = 75


In [66]:
optimizer = Adam(model.parameters(), lr = learning_rate)

In [67]:
loss_fn = nn.MSELoss()

In [68]:
print("Training Start")
print("="*60)
for epoch in range(epochs):
  model.train()
  epoch_loss = 0
  batch_count = 0
  for batch_features, batch_labels in train_loader:

    y_pred = model(batch_features)
    loss = loss_fn(y_pred, batch_labels.view(-1,1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()
    batch_count += 1

    avg_loss = epoch_loss / batch_count

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] - Avg Loss: {avg_loss:.4f}")

print("=" * 60)


Training Start
Epoch [10/75] - Avg Loss: 6169338880.0000
Epoch [10/75] - Avg Loss: 5379148800.0000
Epoch [10/75] - Avg Loss: 7991824725.3333
Epoch [10/75] - Avg Loss: 7040447232.0000
Epoch [10/75] - Avg Loss: 6762095308.8000
Epoch [10/75] - Avg Loss: 6322343296.0000
Epoch [10/75] - Avg Loss: 6695840658.2857
Epoch [10/75] - Avg Loss: 7102675232.0000
Epoch [10/75] - Avg Loss: 6943628544.0000
Epoch [10/75] - Avg Loss: 7245361177.6000
Epoch [10/75] - Avg Loss: 7175444270.5455
Epoch [10/75] - Avg Loss: 7395053802.6667
Epoch [10/75] - Avg Loss: 7368328487.3846
Epoch [10/75] - Avg Loss: 7221211154.2857
Epoch [10/75] - Avg Loss: 7288333670.4000
Epoch [10/75] - Avg Loss: 7571469072.0000
Epoch [10/75] - Avg Loss: 7664386183.5294
Epoch [10/75] - Avg Loss: 7595314702.2222
Epoch [10/75] - Avg Loss: 7569931250.5263
Epoch [10/75] - Avg Loss: 7503362214.4000
Epoch [10/75] - Avg Loss: 8067271789.7143
Epoch [10/75] - Avg Loss: 7889333760.0000
Epoch [10/75] - Avg Loss: 7967845153.3913
Epoch [10/75] - Avg

In [69]:
model.eval()

MainModel(
  (ann): Sequential(
    (0): Linear(in_features=8, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [70]:
def evaluate_model(model, test_loader, y_scaler):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_features, batch_labels in test_loader:
            outputs = model(batch_features)
            all_predictions.append(outputs)
            all_labels.append(batch_labels)

    # Concatenate all batches
    predictions = torch.cat(all_predictions).numpy()
    labels = torch.cat(all_labels).numpy()

    # Inverse transform to original scale
    predictions = y_scaler.inverse_transform(predictions)
    labels = y_scaler.inverse_transform(labels)

    # Calculate metrics
    mse = np.mean((predictions - labels) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(predictions - labels))

    # RÂ² Score
    ss_res = np.sum((labels - predictions) ** 2)
    ss_tot = np.sum((labels - np.mean(labels)) ** 2)
    r2_score = 1 - (ss_res / ss_tot)

    return rmse, mae, r2_score, predictions, labels

# Test the model
rmse, mae, r2, predictions, labels = evaluate_model(model, test_loader, y_scaler)

print("\n" + "=" * 60)
print("ðŸ“Š MODEL PERFORMANCE METRICS")
print("=" * 60)
print(f"Root Mean Squared Error (RMSE): ${rmse:,.2f}")
print(f"Mean Absolute Error (MAE):      ${mae:,.2f}")
print(f"RÂ² Score:                        {r2:.4f}")
print("=" * 60)

# Show sample predictions
print("\nðŸ“‹ Sample Predictions vs Actual:")
print("-" * 60)
for i in range(5):
    print(f"Predicted: ${predictions[i][0]:,.2f} | Actual: ${labels[i][0]:,.2f}")
print("=" * 60)


ðŸ“Š MODEL PERFORMANCE METRICS
Root Mean Squared Error (RMSE): $7,478,253,056.00
Mean Absolute Error (MAE):      $5,322,844,160.00
RÂ² Score:                        0.6983

ðŸ“‹ Sample Predictions vs Actual:
------------------------------------------------------------
Predicted: $14,480,753,664.00 | Actual: $8,292,802,560.00
Predicted: $36,017,057,792.00 | Actual: $40,593,334,272.00
Predicted: $8,209,246,720.00 | Actual: $6,808,253,952.00
Predicted: $31,119,351,808.00 | Actual: $30,630,617,088.00
Predicted: $17,218,547,712.00 | Actual: $16,968,132,608.00
