In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import numpy as np

# --- Step 1: Load and Understand the Data ---
try:
    # Load the dataset from the CSV file
    # This is the line we corrected to match your filename
    car_dataset = pd.read_csv('car.csv')
    print("--- Dataset Loaded Successfully ---")
    # Display the first 5 rows to see what the data looks like
    print("First 5 rows of the dataset:")
    print(car_dataset.head())

    # Get a summary of the dataset (columns, data types, non-null values)
    print("\n--- Dataset Info ---")
    car_dataset.info()

except FileNotFoundError:
    print("\n--- ERROR ---")
    print("File 'car.csv' not found. Please make sure it is in the same directory as your script.")
    exit()


# --- Step 2: Preprocess the Data ---
# Convert 'Fuel_Type', 'Seller_Type', and 'Transmission' into numerical columns
car_dataset = pd.get_dummies(car_dataset, columns=['Fuel_Type', 'Seller_Type', 'Transmission'], drop_first=True)

# Create a new feature for 'Car_Age'
current_year = 2024
car_dataset['Car_Age'] = current_year - car_dataset['Year']

# Drop the original 'Car_Name' and 'Year' columns
car_dataset.drop(['Car_Name', 'Year'], axis=1, inplace=True)

print("\n--- Dataset after Preprocessing (first 5 rows) ---")
print(car_dataset.head())


# --- Step 3: Split Data into Training and Testing Sets ---
X = car_dataset.drop(['Selling_Price'], axis=1)
Y = car_dataset['Selling_Price']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"\nTraining data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


# --- Step 4: Train the Machine Learning Model ---
print("\n--- Starting Model Training ---")
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, Y_train)
print("--- Model Training Finished ---")


# --- Step 5: Evaluate the Model ---
print("\n--- Evaluating Model Performance ---")
test_data_predictions = model.predict(X_test)
r2_score = metrics.r2_score(Y_test, test_data_predictions)
print(f"R-squared (R2) Score: {r2_score:.2f}")

# --- Bonus: Make a Prediction on New Data ---
new_car_data = np.array([[5.59, 27000, 0, 10, 0, 1, 0, 1]])
prediction = model.predict(new_car_data)
print(f"\nPredicted selling price for the new car: {prediction[0]:.2f} Lakhs")

--- Dataset Loaded Successfully ---
First 5 rows of the dataset:
  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  

--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-



In [2]:
from google.colab import files
import pandas as pd

# Upload the file
uploaded = files.upload()

# Assuming your file is named 'car.csv' and is a csv file
df = pd.read_csv('car.csv')  # Read the CSV file
print(df.head())  # Show first 5 rows

Saving car.csv to car (1).csv
  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  
