<a href="https://colab.research.google.com/github/Diooonis2Syracuse/ML/blob/main/MPGRegress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch

In [None]:
df = pd.read_csv('/content/auto-mpg.csv')

In [None]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [None]:
df = df.drop('car name', axis=1)

In [None]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,18.0,8,307.0,130,3504,12.0,70,1
1,15.0,8,350.0,165,3693,11.5,70,1
2,18.0,8,318.0,150,3436,11.0,70,1
3,16.0,8,304.0,150,3433,12.0,70,1
4,17.0,8,302.0,140,3449,10.5,70,1


In [None]:
df = df.dropna()
df = df.reset_index(drop=True)

In [None]:
import sklearn
import sklearn.model_selection
df_train, df_test = sklearn.model_selection.train_test_split(
    df, train_size=0.8, random_state=1
    )

In [None]:
df_train.dtypes

Unnamed: 0,0
mpg,float64
cylinders,int64
displacement,float64
horsepower,int64
weight,int64
acceleration,float64
model year,int64
origin,int64


In [None]:
df_train_stats = df_train.describe().transpose()

In [None]:
df_test_stats = df_test.describe().transpose()

In [None]:
print(df_train_stats)
print()
print(df_test_stats)

              count         mean         std     min     25%     50%     75%  \
mpg           313.0    23.404153    7.666909     9.0    17.5    23.0    29.0   
cylinders     313.0     5.402556    1.701506     3.0     4.0     4.0     8.0   
displacement  313.0   189.512780  102.675646    68.0   104.0   140.0   260.0   
horsepower    313.0   102.929712   37.919046    46.0    75.0    92.0   120.0   
weight        313.0  2961.198083  848.602146  1613.0  2219.0  2755.0  3574.0   
acceleration  313.0    15.704473    2.725399     8.5    14.0    15.5    17.3   
model year    313.0    75.929712    3.675305    70.0    73.0    76.0    79.0   
origin        313.0     1.591054    0.807923     1.0     1.0     1.0     2.0   

                 max  
mpg             46.6  
cylinders        8.0  
displacement   455.0  
horsepower     230.0  
weight        5140.0  
acceleration    24.8  
model year      82.0  
origin           3.0  

              count         mean         std     min     25%     50%   

In [None]:
names_columns = ['cylinders',	'displacement',	'horsepower',	'weight',	'acceleration']

In [None]:
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()

In [None]:
for col in names_columns:
  mean_tr = df_train_stats.loc[col, 'mean']
  std_tr = df_train_stats.loc[col, 'std']
  mean_ts = df_test_stats.loc[col, 'mean']
  std_ts = df_test_stats.loc[col, 'std']

  df_train_norm.loc[:, col] = (df_train_norm.loc[:, col] - mean_tr) / std_tr
  df_test_norm.loc[:, col] = (df_test_norm.loc[:, col] - mean_ts) / std_ts

df_train_norm.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
334,27.2,-6.305314,-1.863894,-2.787942,-3.493618,-8.6524,81,1
258,18.6,-6.0667,-1.863893,-2.78793,-3.493618,-8.598024,78,1
139,29.0,-6.305314,-1.863895,-2.787943,-3.493618,-8.637899,74,2
310,37.2,-6.305314,-1.863895,-2.787951,-3.493618,-8.639712,80,3
349,33.0,-6.305314,-1.863895,-2.787947,-3.493618,-8.679587,81,2


In [None]:
boundaries = torch.tensor([73, 76, 79])
year_tr = torch.tensor(df_train_norm['model year'].values)
df_train_norm['model year bucketed'] = torch.bucketize(year_tr, boundaries, right=True)
year_ts = torch.tensor(df_test_norm['model year'].values)
df_test_norm['model year bucketed'] = torch.bucketize(year_ts, boundaries, right=True)
names_columns.append('model year bucketed')

In [None]:
from torch.nn.functional import one_hot
total_origin = len(set(df_train_norm['origin']))

origin_encoded_tr = one_hot(torch.from_numpy(
    df_train_norm['origin'].values) % total_origin)

x_train_num = torch.tensor(
    df_train_norm[names_columns].values)

x_train = torch.cat([x_train_num, origin_encoded_tr], 1).float()

origin_encoded_ts = one_hot(torch.from_numpy(
    df_test_norm['origin'].values) % total_origin)

x_test_num = torch.tensor(
    df_test_norm[names_columns].values)

x_test = torch.cat([x_test_num, origin_encoded_ts], 1).float()

In [None]:
y_train = torch.tensor(df_train_norm['mpg'].values).float()
y_test = torch.tensor(df_test_norm['mpg'].values).float()

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

train_ds = TensorDataset(x_train, y_train)
batch_size = 8
torch.manual_seed(1)
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [None]:
import torch.nn as nn
hidden_units = [8, 4]
input_size = x_train.shape[1]
all_layers = []

for hidden_unit in hidden_units:
  layer = nn.Linear(input_size, hidden_unit)
  all_layers.append(layer)
  all_layers.append(nn.ReLU())
  input_size = hidden_unit

all_layers.append(nn.Linear(hidden_units[-1], 1))
model = nn.Sequential(*all_layers)

In [None]:
loss_fn = nn.MSELoss()
optimazer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
torch.manual_seed(1)
num_epochs = 200
log_epochs = 20

for epoch in range(num_epochs):
  loss_hist_train = 0

  for x_batch, y_batch in train_dl:
    pred = model(x_batch)[:, 0]
    loss = loss_fn(pred, y_batch)
    loss.backward()
    optimazer.step()
    optimazer.zero_grad()
    loss_hist_train = loss_hist_train + loss.item()

  if epoch % log_epochs == 0:
    print(f'Эпоха {epoch} Потеря' f'{loss_hist_train/len(train_dl):.4f}')

Эпоха 0 Потеря58.9706
Эпоха 20 Потеря59.6394
Эпоха 40 Потеря60.5288
Эпоха 60 Потеря58.9279
Эпоха 80 Потеря58.8907
Эпоха 100 Потеря59.4492
Эпоха 120 Потеря59.4524
Эпоха 140 Потеря59.0957
Эпоха 160 Потеря57.9210
Эпоха 180 Потеря58.4137


In [None]:
with torch.no_grad():
  pred = model(x_test.float())[:, 0]
  loss = loss_fn(pred, y_test)
  print(f'MSE при тестировании: {loss.item():.4f}')
  print(f'MAE при тестировании: {nn.L1Loss()(pred, y_test).item():.4f}')

MSE при тестировании: 69.3322
MAE при тестировании: 7.2998
