# 📌 Automobile Comapnies Data Analysis Using Numpy and Pandas

#

### ✅ Section 1: Data Loading & Basic Exploration

1. Load the dataset and display the first 5 rows.

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv(r"D:\PYTON PROGRAMMING\PYTHON FILES\PANDAS\PANDAS-MODULE-PRACTICE\Car Company Data Analysis\Automobile_data.csv")
df.head(5)

2. Print the number of rows and columns.

In [8]:
print("Rows : ",df.shape[0])
print("Columns : ",df.shape[1])

Rows :  61
Columns :  10


3. Show all column names and their data types.

In [None]:
column = df.dtypes
print(column)

4. Display the last 10 rows of the dataset.

In [None]:
df.tail(10)

5. Get the basic summary statistics (describe) for numerical columns.

In [None]:
df.info()

### ✅ Section 2: Data Cleaning & Handling Missing Values

6. Replace all '?' with np.nan.

In [36]:
df.replace("?",np.nan,inplace=True)

7. Count total missing values in each column.

In [None]:
missing_values_count = df.isna().sum()
print(missing_values_count)

8. Drop all rows with missing values and print the shape.

In [30]:
print("Before droping the missing value rows, shape : ",df.shape,"\n")
df.dropna(inplace=True)
print("After droping the missing value rows, shape : ",df.shape)

Before droping the missing value rows, shape :  (61, 10) 

After droping the missing value rows, shape :  (58, 10)


9. Convert the price, horsepower, and average-mileage columns to numeric.

In [None]:
df['average-mileage'] = pd.to_numeric(df['average-mileage'], errors='coerce')
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')
print(df)
print(df.dtypes)


10. Print company and price column only.

In [None]:
df[['company','price']]

### ✅ Section 3: Filtering & Conditional Selection

11. Select all rows where company is 'audi'

In [52]:
audi_rows = df[df['company'] == 'audi']
audi_rows

Unnamed: 0,index,company,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
3,3,audi,sedan,99.8,176.6,ohc,four,102,24,13950.0
4,4,audi,sedan,99.4,176.6,ohc,five,115,18,17450.0
5,5,audi,sedan,99.8,177.3,ohc,five,110,19,15250.0
6,6,audi,wagon,105.8,192.7,ohc,five,110,19,18920.0


12. Filter cars where engine-type is 'ohc'

In [56]:
ohc_engine = df[df['engine-type']=='ohc']
ohc_engine.shape[0]

46

13. Find all cars with price between 10000 and 15000.

3. Print first 4 rows and the last 4 rows of the data file.

In [None]:
df.head(4)  #Prints the first 4 rows.
df.tail(4)  #Prints the last 4 rows.

11. Print all the rows which have atleast one NaN value.

In [None]:
print(df[df.isna().any(axis=1)])

12. Remove all the NaN value from the Dataframe.

In [2]:
#Remove all values here...
df.dropna(inplace=True)

13. Clean the data by capitalizing the first letter.

In [None]:
cols = ['company', 'body-style', 'num-of-cylinders']
df[cols] = df[cols].apply(lambda x: x.str.strip().str.title())
df['engine-type'] = df['engine-type'].str.upper()
df.head(5)

5. Print all the cars from a perticular company which have price over 15000.


In [None]:
df[(df['company'] == 'Alfa-Romero') & (df['price'] > 15000)]

6. Change the price of car at row 15 to 12000

In [4]:
df.loc[15,'price'] = 12000

7. Sum the entire price column.

In [None]:
df.sum()['price']

7. Print average car price of each company.

In [None]:
df.groupby('company')['price'].agg('mean')

8. Print number of cars from each company.

In [None]:
df.groupby('company')['index'].count().sort_values(ascending=False)

9. Print all the car comapny that starts with A.

In [None]:
print(df[df['company'].str.startswith('a')])


10. Arrange the data on the basis of price.

In [None]:
df.sort_values(['price'], ascending = True,inplace=True)    # This will sort the entire data on the basis of price column.
# For reseting the index
df.reset_index(drop=True, inplace=True)
df['index'] = df.index

13. Replace a word with another.

In [16]:
df['num-of-cylinders'].replace({'Four': 'Eight'}, inplace=True)

14. Increase the price of Eight cylinders engine by 3000.

In [None]:
df['price'] = df.apply(lambda x: x['price'] + 9563 if x['num-of-cylinders'] == 'Eight' else x['price'],axis=1)
df

15. Increase the horsepower of the Eight cylinders engine by an amont of 82.

In [None]:
df['horsepower'] = df.apply(lambda x : x['horsepower'] + 82 if x['num-of-cylinders'] == 'Eight' else x['horsepower'],axis=1)

In [None]:
df.head(10)

16. Calculate the average price of eight cylindered engine cars.

In [None]:
df[df['num-of-cylinders'] == 'Eight']['price'].agg('mean')

17. Calulate the average price for each number of cylinders type car.

In [None]:
df.groupby('num-of-cylinders')['price'].agg('mean').sort_values(ascending=False)

18. Print all the cars with rotor engine.

In [None]:
df[df['engine-type'] == 'ROTOR']

19. Convert Rotor engine to Electric.

In [None]:
df['engine-type'].replace({'ROTOR':'ELECTRIC'},inplace=True)

20. Increase the power of that Electric engine by 20 units.

In [None]:
df['horsepower'] = df.apply(lambda row: row['horsepower'] + 20 if row['engine-type'] == 'ELECTRIC' else row['horsepower'], axis = 1)

21. Increase the price of the electric car by 6945.

In [23]:
df['price'] = df.apply(lambda row: row['price'] + 6945 if row['engine-type'] == "ELCTRIC" else row['price'], axis=1)

22. Add electric to the car brand suffix.

In [24]:
df['company'] = df.apply(lambda row : row['company'] + 'Elcetric' if row['engine-type'] == 'ELECTRIC' else row['company'], axis = 1)

22. Print all the cars of Porsche.

In [None]:
df.loc[60, 'price'] = 45000
df_porsche = df[df['company'].str.strip().str.lower() == "porsche"]

porsche_avg = df[df['company'].str.strip().str.lower() == "porsche"]['price'].agg('mean')

power_avg = df.groupby('company')['horsepower'].agg('mean').sort_values(ascending=0)
power_avg