<a href="https://colab.research.google.com/github/PranavGovindu/practice/blob/main/fuels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from torch.utils.data import DataLoader, TensorDataset

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']
df = pd.read_csv(url, names=column_names,
                 na_values="?", comment='\t',
                 sep=" ", skipinitialspace=True)

df = df.dropna().reset_index(drop=True)

X = df.drop('MPG', axis=1)
y = df['MPG']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)

numeric_columns = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year']
categorical_columns = ['Origin']

class PyTorchRegressor(BaseEstimator, TransformerMixin):
    def __init__(self, hidden_units=[8, 4], num_epochs=200, lr=0.001, batch_size=8):
        self.hidden_units = hidden_units
        self.num_epochs = num_epochs
        self.lr = lr
        self.batch_size = batch_size
        self.model = None

    def fit(self, X, y):
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

        dataset = TensorDataset(X_tensor, y_tensor)
        train_loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        input_size = X.shape[1]
        layers = []
        for hidden in self.hidden_units:
            layers.append(nn.Linear(input_size, hidden))
            layers.append(nn.ReLU())
            input_size = hidden
        layers.append(nn.Linear(self.hidden_units[-1], 1))

        self.model = nn.Sequential(*layers)
        loss_fn = nn.MSELoss()
        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)

        for epoch in range(self.num_epochs):
            for x_batch, y_batch in train_loader:
                optimizer.zero_grad()
                y_pred = self.model(x_batch)
                loss = loss_fn(y_pred, y_batch)
                loss.backward()
                optimizer.step()
        return self

    def predict(self, X):
        with torch.no_grad():
            X_tensor = torch.tensor(X, dtype=torch.float32)
            return self.model(X_tensor).numpy()
    def print_model(self):
        print(self.model)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ]
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('pytorch_model', PyTorchRegressor(hidden_units=[8, 4], num_epochs=200, lr=0.001, batch_size=8))
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

mse = np.mean((y_pred.flatten() - y_test.values) ** 2)
print(f'Test MSE: {mse:.4f}')
pipeline.named_steps['pytorch_model'].print_model()


Test MSE: 9.1546
Sequential(
  (0): Linear(in_features=9, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4, out_features=1, bias=True)
)
