In [1]:
import pandas as pd

# Load the fullGas.csv file into a pandas DataFrame
df = pd.read_csv('/content/fullGas.csv')

print("Original DataFrame Info:")
df.info()
print("\nFirst 5 rows of the original DataFrame:")
display(df.head())

Original DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40494 entries, 0 to 40493
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Make                  40494 non-null  object 
 1   Model                 40273 non-null  object 
 2   Body                  40493 non-null  object 
 3   Mileage_km            40494 non-null  int64  
 4   Price                 40494 non-null  int64  
 5   Year                  38799 non-null  float64
 6   Country               37902 non-null  object 
 7   Condition             40494 non-null  object 
 8   Fuel_Type             40465 non-null  object 
 9   Fuel_Consumption_l    12773 non-null  float64
 10  Drivetrain            31860 non-null  object 
 11  Gearbox               40014 non-null  object 
 12  Gears                 25115 non-null  float64
 13  Power_hp              40494 non-null  int64  
 14  Engine_Size_cc        34564 non-null  float64

Unnamed: 0,Make,Model,Body,Mileage_km,Price,Year,Country,Condition,Fuel_Type,Fuel_Consumption_l,...,Cylinders,Seats,Doors,Color,Upholstery,Full_Service_History,Non_Smoker_Vehicle,Previous_Owners,Seller,Image_url
0,Abarth,595,Compact,98000,16900,2020.0,IT,Used,Gasoline,6.7,...,4.0,4.0,3.0,White,Full leather,False,False,,Dealer,https://prod.pictures.autoscout24.net/listing-...
1,Abarth,595,Sedan,91500,12500,2017.0,IT,Used,Gasoline,6.0,...,4.0,4.0,3.0,Grey,Full leather,False,True,4.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
2,Abarth,595,Compact,40000,17990,2015.0,IT,Used,Gasoline,6.5,...,4.0,4.0,3.0,Bronze,Full leather,True,True,1.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
3,Abarth,500,Compact,133000,9300,2008.0,IT,Used,Gasoline,6.5,...,4.0,4.0,3.0,Black,Cloth,True,True,2.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
4,Abarth,595,Compact,61019,14990,2021.0,BE,Used,Gasoline,,...,4.0,4.0,3.0,Yellow,Part leather,True,True,1.0,Dealer,https://prod.pictures.autoscout24.net/listing-...


### Data Type Conversion and Feature Extraction

Assuming your dataset contains a 'Date' column (or similar, like 'Timestamp' or 'DateTime'), I will convert it to a datetime object and then extract several time-based features that can be useful for analysis or modeling. If your date column has a different name, please adjust `df['Date']` accordingly.

Extracted features will include: `year`, `month`, `day`, `dayofweek` (0=Monday, 6=Sunday), `dayofyear`, `weekofyear`, `quarter`, and `is_weekend`.

### Handling Missing Values

I will check for missing values across all columns and fill numerical columns with their mean. For other data types, you might consider different strategies like mode imputation or dropping rows/columns, depending on the context.

In [3]:
print("Missing values before handling:")
display(df.isnull().sum()[df.isnull().sum() > 0])

# Fill missing numerical values with the mean of their respective columns
for col in df.select_dtypes(include=['number']).columns:
    if df[col].isnull().any():
        df[col].fillna(df[col].mean(), inplace=True)

print("\nMissing values after handling (numerical columns filled with mean):")
display(df.isnull().sum()[df.isnull().sum() > 0])

Missing values before handling:


Unnamed: 0,0
Model,221
Body,1
Year,1695
Country,2592
Fuel_Type,29
Fuel_Consumption_l,27721
Drivetrain,8634
Gearbox,480
Gears,15379
Engine_Size_cc,5930



Missing values after handling (numerical columns filled with mean):


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)


Unnamed: 0,0
Model,221
Body,1
Country,2592
Fuel_Type,29
Drivetrain,8634
Gearbox,480
Color,2575
Upholstery,7272
Seller,150


### Processed Data Overview



In [4]:
print("DataFrame Info: after processing.")
df.info()
print("\nFirst 5 rows of the processed DataFrame (showing new features):")
display(df.head())

Processed DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40494 entries, 0 to 40493
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Make                  40494 non-null  object 
 1   Model                 40273 non-null  object 
 2   Body                  40493 non-null  object 
 3   Mileage_km            40494 non-null  int64  
 4   Price                 40494 non-null  int64  
 5   Year                  40494 non-null  float64
 6   Country               37902 non-null  object 
 7   Condition             40494 non-null  object 
 8   Fuel_Type             40465 non-null  object 
 9   Fuel_Consumption_l    40494 non-null  float64
 10  Drivetrain            31860 non-null  object 
 11  Gearbox               40014 non-null  object 
 12  Gears                 40494 non-null  float64
 13  Power_hp              40494 non-null  int64  
 14  Engine_Size_cc        40494 non-null  float6

Unnamed: 0,Make,Model,Body,Mileage_km,Price,Year,Country,Condition,Fuel_Type,Fuel_Consumption_l,...,Cylinders,Seats,Doors,Color,Upholstery,Full_Service_History,Non_Smoker_Vehicle,Previous_Owners,Seller,Image_url
0,Abarth,595,Compact,98000,16900,2020.0,IT,Used,Gasoline,6.7,...,4.0,4.0,3.0,White,Full leather,False,False,1.406313,Dealer,https://prod.pictures.autoscout24.net/listing-...
1,Abarth,595,Sedan,91500,12500,2017.0,IT,Used,Gasoline,6.0,...,4.0,4.0,3.0,Grey,Full leather,False,True,4.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
2,Abarth,595,Compact,40000,17990,2015.0,IT,Used,Gasoline,6.5,...,4.0,4.0,3.0,Bronze,Full leather,True,True,1.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
3,Abarth,500,Compact,133000,9300,2008.0,IT,Used,Gasoline,6.5,...,4.0,4.0,3.0,Black,Cloth,True,True,2.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
4,Abarth,595,Compact,61019,14990,2021.0,BE,Used,Gasoline,6.727151,...,4.0,4.0,3.0,Yellow,Part leather,True,True,1.0,Dealer,https://prod.pictures.autoscout24.net/listing-...
