In [1]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load training data
train_df = pd.read_csv('train.csv')

# Extract features and target from training data
X_train = train_df[['year']]
y_train = train_df['price']

# Define a list of regression models to evaluate
models = [
    LinearRegression(),
    RandomForestRegressor(n_estimators=100, random_state=42)
]

# Evaluate each model using cross-validation
for model in models:
    # Perform cross-validation
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Calculate metrics
    mse_cv = -cv_scores.mean()
    rmse_cv = mse_cv ** 0.5
    r2_cv = cross_val_score(model, X_train, y_train, cv=5, scoring='r2').mean()

    # Print the results
    print(f'Model: {model.__class__.__name__}')
    print(f'Cross-validated Mean Squared Error (MSE): {mse_cv}')
    print(f'Cross-validated Root Mean Squared Error (RMSE): {rmse_cv}')
    print(f'Cross-validated R-squared (R²): {r2_cv}')
    print('----------------------------------------')

# Choose the best model based on performance
best_model = RandomForestRegressor(n_estimators=100, random_state=42)  # Or choose the model with the best performance

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Load test data
test_df = pd.read_csv('test.csv')

# Extract features from test data
X_test = test_df[['year']]

# Predict prices for test data using the best model
test_df['price'] = best_model.predict(X_test)

# Save test data with predicted prices to a new CSV file
test_df.to_csv('testwithpredictions3.csv', index=False)


Model: LinearRegression
Cross-validated Mean Squared Error (MSE): 470833364529911.6
Cross-validated Root Mean Squared Error (RMSE): 21698694.996010974
Cross-validated R-squared (R²): 0.8235262389840721
----------------------------------------
Model: RandomForestRegressor
Cross-validated Mean Squared Error (MSE): 6146800015581016.0
Cross-validated Root Mean Squared Error (RMSE): 78401530.69667082
Cross-validated R-squared (R²): -1.3556639178906529
----------------------------------------
