In [4]:
## 1. Introduction to the data ##

import pandas as pd
import numpy as np
cars = pd.read_csv("Auto.csv")
unique_regions = cars['origin'].unique()
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


## 2. Dummy variables ##

dummy_cylinders = pd.get_dummies(cars["cylinders"], prefix="cyl")
cars = pd.concat([cars, dummy_cylinders], axis=1)

dummy_years = pd.get_dummies(cars['year'], prefix='year')
cars = pd.concat([cars, dummy_years], axis=1)

cars = cars.drop(columns=['year', 'cylinders'])

## 3. Multiclass classification ##

np.random.seed(42)
shuffled_rows = np.random.permutation(cars.index)
shuffled_cars = cars.reindex(shuffled_rows)

train = shuffled_cars.iloc[:274]
test = shuffled_cars.iloc[274:]

## 4. Training a multiclass logistic regression model ##

from sklearn.linear_model import LogisticRegression

unique_origins = cars["origin"].unique()
unique_origins.sort()

models = {}
features = [c for c in train.columns if c.startswith("cyl") or c.startswith("year")]

for origin in unique_origins:
    model = LogisticRegression()
    
    X_train = train[features]
    y_train = train["origin"] == origin

    model.fit(X_train, y_train)
    print(model.classes_)              
    models[origin] = model

## 5. Testing the models ##

testing_probs = pd.DataFrame(columns=unique_origins)

for origin in unique_origins:
    X_test = test[features]
    testing_probs[origin] = models[origin].predict_proba(X_test)[:,1]

## 6. Choose the origin ##
testing_probs['predicted_class'] = testing_probs.idxmax(axis=1)
testing_probs

[False  True]
[False  True]
[False  True]


Unnamed: 0,1,2,3,predicted_class
0,0.501463,0.159126,0.347161,1
1,0.289938,0.467950,0.244734,2
2,0.844593,0.088430,0.078781,1
3,0.226110,0.349057,0.434419,3
4,0.961397,0.037096,0.021617,1
5,0.844593,0.088430,0.078781,1
6,0.347880,0.203050,0.449724,3
7,0.799793,0.119026,0.074095,1
8,0.888490,0.088175,0.051411,1
9,0.347880,0.203050,0.449724,3
