In [9]:
import pandas as pd


df = pd.read_csv('E:/Python Basics/vehicles_sample.csv')
print(df)

# Clean the data by handling missing values and removing duplicates
def clean_data(df):
    # Fill missing values with median for Year and mean for Price
    df['Year'].fillna(df['Year'].median(), inplace=True)
    df['Price'].fillna(df['Price'].mean(), inplace=True)
    
    # Remove duplicates
    df.drop_duplicates(inplace=True)
    
    return df

# Manipulate the data by selecting columns, filtering, aggregating, and adding a new column
def manipulate_data(df):
    # Select specific columns
    df_selected = df[['Make', 'Model', 'Year']]
    
    # Filter data (e.g., vehicles with Price > 34000)
    df_filtered = df[df['Price'] > 34000]
    
    # Aggregate data (e.g., average price by make)
    df_grouped = df.groupby('Make')['Price'].mean().reset_index()
    
    # Add a new column (e.g., Price after 5% discount)
    df['Price_After_Discount'] = df['Price'] * 0.95
    
    # Sort data by Year
    df_sorted = df.sort_values(by='Year')
    
    return df_selected, df_filtered, df_grouped, df_sorted

# Save the cleaned DataFrame to a new CSV file
def save_data(df, file_path):
    df.to_csv(file_path, index=False)

# Main function
def main():
    file_path = 'E:/Python Basics/vehicles_sample.csv'
    df =  pd.read_csv('E:/Python Basics/vehicles_sample.csv')
    print("Original DataFrame:")
    print(df)
    
    df = clean_data(df)
    print("\nDataFrame after cleaning:")
    print(df)
    
    df_selected, df_filtered, df_grouped, df_sorted = manipulate_data(df)
    
    print("\nSelected Columns (Make, Model, Year):")
    print(df_selected)
    
    print("\nFiltered Data (Price > 34000):")
    print(df_filtered)
    
    print("\nAverage Price by Make:")
    print(df_grouped)
    
    print("\nDataFrame with Price After Discount:")
    print(df)
    
    print("\nDataFrame sorted by Year:")
    print(df_sorted)
    
    save_data(df, 'E:/Python Basics/cleaned_vehicles.csv')

if __name__ == "__main__":
    main()

          Make   Model    Year    Price
0       Nissan   Camry  2017.0  24000.0
1        Honda   Civic  2013.0  24000.0
2         Ford  Fusion  2014.0  34000.0
3    Chevrolet  Accord  2022.0  32000.0
4       Nissan  Accord  2015.0  27000.0
..         ...     ...     ...      ...
195      Honda  Fusion  2016.0  34000.0
196     Nissan  Fusion  2022.0  20000.0
197     Nissan  Fusion  2017.0  18000.0
198     Toyota  Sentra  2011.0  23000.0
199      Honda   Focus  2020.0  20000.0

[200 rows x 4 columns]
Original DataFrame:
          Make   Model    Year    Price
0       Nissan   Camry  2017.0  24000.0
1        Honda   Civic  2013.0  24000.0
2         Ford  Fusion  2014.0  34000.0
3    Chevrolet  Accord  2022.0  32000.0
4       Nissan  Accord  2015.0  27000.0
..         ...     ...     ...      ...
195      Honda  Fusion  2016.0  34000.0
196     Nissan  Fusion  2022.0  20000.0
197     Nissan  Fusion  2017.0  18000.0
198     Toyota  Sentra  2011.0  23000.0
199      Honda   Focus  2020.0  2000