## Final
### Tanner Young

In [2]:
import pandas as pd

file_path = 'auto-mpg.csv'

auto_mpg_data = pd.read_csv(file_path)

auto_mpg_data.head()


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


### Regression Model
### Tanner Young

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

auto_mpg_data = pd.read_csv('auto-mpg.csv')

selected_features = ['displacement', 'weight'] 
X = auto_mpg_data[selected_features]
y = auto_mpg_data['mpg']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")


Mean Squared Error: 14.991566354406837
R-squared Score: 0.7211724393728973


### Standard Metrics and Bias/Coefficients
### Tanner Young

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

auto_mpg_data = pd.read_csv('auto-mpg.csv')  

selected_features = ['displacement', 'weight']  
X = auto_mpg_data[selected_features]
y = auto_mpg_data['mpg']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

intercept = model.intercept_
coefficients = model.coef_

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

print(f"Intercept (Bias): {intercept}")
for i, feature in enumerate(selected_features):
    print(f"Coefficient for {feature}: {coefficients[i]}")


Mean Squared Error: 14.991566354406837
R-squared Score: 0.7211724393728973
Intercept (Bias): 44.08885823461314
Coefficient for displacement: -0.018424112644130065
Coefficient for weight: -0.005707285234653733


### Pipeline 1
#### Tanner Young

In [10]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

auto_mpg_data = pd.read_csv('auto-mpg.csv')

selected_features = ['displacement', 'weight'] 
X = auto_mpg_data[selected_features]
y = auto_mpg_data['mpg']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline_1 = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('regression', LinearRegression())
])

pipeline_2 = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('poly_features', PolynomialFeatures(degree=3)),
    ('scaler', StandardScaler()),
    ('regression', LinearRegression())
])

pipeline_1.fit(X_train, y_train)
y_pred_pipe1 = pipeline_1.predict(X_test)
mse_pipe1 = mean_squared_error(y_test, y_pred_pipe1)
r2_pipe1 = r2_score(y_test, y_pred_pipe1)

pipeline_2.fit(X_train, y_train)
y_pred_pipe2 = pipeline_2.predict(X_test)
mse_pipe2 = mean_squared_error(y_test, y_pred_pipe2)
r2_pipe2 = r2_score(y_test, y_pred_pipe2)

pipeline_1.fit(X_train, y_train)
y_pred_pipe1 = pipeline_1.predict(X_test)
mse_pipe1 = mean_squared_error(y_test, y_pred_pipe1)
r2_pipe1 = r2_score(y_test, y_pred_pipe1)

intercept_pipe1 = pipeline_1.named_steps['regression'].intercept_
coefficients_pipe1 = pipeline_1.named_steps['regression'].coef_

pipeline_2.fit(X_train, y_train)
y_pred_pipe2 = pipeline_2.predict(X_test)
mse_pipe2 = mean_squared_error(y_test, y_pred_pipe2)
r2_pipe2 = r2_score(y_test, y_pred_pipe2)

intercept_pipe2 = pipeline_2.named_steps['regression'].intercept_
coefficients_pipe2 = pipeline_2.named_steps['regression'].coef_

print("Pipeline 1 Metrics:")
print(f"Mean Squared Error: {mse_pipe1}")
print(f"R-squared Score: {r2_pipe1}")
print(f"Intercept (Bias) - Pipeline 1: {intercept_pipe1}")
print(f"Coefficients - Pipeline 1:")
for i, feature in enumerate(selected_features):
    print(f"{feature}: {coefficients_pipe1[i]}")

print("\nPipeline 2 Metrics:")
print(f"Mean Squared Error: {mse_pipe2}")
print(f"R-squared Score: {r2_pipe2}")
print(f"Intercept (Bias) - Pipeline 2: {intercept_pipe2}")
print(f"Coefficients - Pipeline 2:")
for i, feature in enumerate(selected_features):
    print(f"{feature}: {coefficients_pipe2[i]}")

Pipeline 1 Metrics:
Mean Squared Error: 14.991566354406825
R-squared Score: 0.7211724393728975
Intercept (Bias) - Pipeline 1: 23.60817610062893
Coefficients - Pipeline 1:
displacement: -1.8943995104230102
weight: -4.790095747900251

Pipeline 2 Metrics:
Mean Squared Error: 12.572387598221146
R-squared Score: 0.7661666511424955
Intercept (Bias) - Pipeline 2: 23.60817610062894
Coefficients - Pipeline 2:
displacement: 0.0
weight: -6.642864828588152
