In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

url = "https://archive.ics.uci.edu/static/public/9/data.csv"

df = pd.read_csv(url)

print("Initial DataFrame:\n", df.head())

# Renaming the columns for easier access
df.columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin', 'car_name']

# Handle missing values (replace missing horsepower values with the mean)
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].mean())

# Drop 'car_name' as it's not useful in our analysis
df.drop('car_name', axis=1, inplace=True)

# One-hot encode 'origin' column
df = pd.get_dummies(df, columns=['origin'], prefix='origin')

# Separate features and target
X = df.drop('mpg', axis=1)
y = df['mpg']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Print the preprocessed data
print("\nPreprocessed X_train sample:\n", X_train_scaled[:5])
print("Preprocessed X_test sample:\n", X_test_scaled[:5])
