* Pandas is a powerful and easy-to-use data analysis and manipulation library for Python. It is especially useful when working with structured data (like tables, Excel files, or SQL results).


| Feature                 | Description                                                     |
| ----------------------- | --------------------------------------------------------------- |
| **DataFrame & Series**  | Core data structures to store tabular and one-dimensional data. |
| **Read/Write Files**    | Load data from CSV, Excel, SQL, JSON, etc.                      |
| **Data Cleaning**       | Handle missing values, remove duplicates, filter rows.          |
| **Data Manipulation**   | Merge, join, group, sort, pivot, and reshape data.              |
| **Statistics**          | Built-in functions like mean, median, standard deviation.       |
| **Time Series Support** | Great tools for handling date and time data.                    |


In [2]:
import pandas as pd 

In [3]:
df =pd.read_csv('Used_Bikes.csv')

In [4]:
df =pd.read_csv(r"C:\Users\kumaw\OneDrive\Desktop\Data science 1 july\1st-july-data-science-\pandas\Used_Bikes.csv") 

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32648 entries, 0 to 32647
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   bike_name   32648 non-null  object 
 1   price       32648 non-null  float64
 2   city        32648 non-null  object 
 3   kms_driven  32648 non-null  float64
 4   owner       32648 non-null  object 
 5   age         32648 non-null  float64
 6   power       32648 non-null  float64
 7   brand       32648 non-null  object 
dtypes: float64(4), object(4)
memory usage: 2.0+ MB


In [6]:
df.duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
32643     True
32644     True
32645     True
32646     True
32647     True
Length: 32648, dtype: bool

In [7]:
df.duplicated().sum()

25324

In [8]:
df.drop_duplicates(inplace=True)

In [9]:
df.duplicated().sum()

0

In [10]:
df.describe()

Unnamed: 0,price,kms_driven,age,power
count,7324.0,7324.0,7324.0,7324.0
mean,84883.9,23910.496587,6.656472,228.133397
std,120966.2,27317.594631,3.605299,158.324219
min,4400.0,1.0,1.0,100.0
25%,30000.0,10155.75,4.0,125.0
50%,55000.0,19000.0,6.0,160.0
75%,100000.0,30112.0,8.0,350.0
max,1900000.0,750000.0,63.0,1800.0


In [11]:
df 

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
0,TVS Star City Plus Dual Tone 110cc,35000.0,Ahmedabad,17654.0,First Owner,3.0,110.0,TVS
1,Royal Enfield Classic 350cc,119900.0,Delhi,11000.0,First Owner,4.0,350.0,Royal Enfield
2,Triumph Daytona 675R,600000.0,Delhi,110.0,First Owner,8.0,675.0,Triumph
3,TVS Apache RTR 180cc,65000.0,Bangalore,16329.0,First Owner,4.0,180.0,TVS
4,Yamaha FZ S V 2.0 150cc-Ltd. Edition,80000.0,Bangalore,10000.0,First Owner,3.0,150.0,Yamaha
...,...,...,...,...,...,...,...,...
9362,Hero Hunk Rear Disc 150cc,25000.0,Delhi,48587.0,First Owner,8.0,150.0,Hero
9369,Bajaj Avenger 220cc,35000.0,Bangalore,60000.0,First Owner,9.0,220.0,Bajaj
9370,Harley-Davidson Street 750 ABS,450000.0,Jodhpur,3430.0,First Owner,4.0,750.0,Harley-Davidson
9371,Bajaj Dominar 400 ABS,139000.0,Hyderabad,21300.0,First Owner,4.0,400.0,Bajaj


In [12]:
df.head(10)

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
0,TVS Star City Plus Dual Tone 110cc,35000.0,Ahmedabad,17654.0,First Owner,3.0,110.0,TVS
1,Royal Enfield Classic 350cc,119900.0,Delhi,11000.0,First Owner,4.0,350.0,Royal Enfield
2,Triumph Daytona 675R,600000.0,Delhi,110.0,First Owner,8.0,675.0,Triumph
3,TVS Apache RTR 180cc,65000.0,Bangalore,16329.0,First Owner,4.0,180.0,TVS
4,Yamaha FZ S V 2.0 150cc-Ltd. Edition,80000.0,Bangalore,10000.0,First Owner,3.0,150.0,Yamaha
5,Yamaha FZs 150cc,53499.0,Delhi,25000.0,First Owner,6.0,150.0,Yamaha
6,Honda CB Hornet 160R ABS DLX,85000.0,Delhi,8200.0,First Owner,3.0,160.0,Honda
7,Hero Splendor Plus Self Alloy 100cc,45000.0,Delhi,12645.0,First Owner,3.0,100.0,Hero
8,Royal Enfield Thunderbird X 350cc,145000.0,Bangalore,9190.0,First Owner,3.0,350.0,Royal Enfield
9,Royal Enfield Classic Desert Storm 500cc,88000.0,Delhi,19000.0,Second Owner,7.0,500.0,Royal Enfield


In [13]:
df.tail()

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
9362,Hero Hunk Rear Disc 150cc,25000.0,Delhi,48587.0,First Owner,8.0,150.0,Hero
9369,Bajaj Avenger 220cc,35000.0,Bangalore,60000.0,First Owner,9.0,220.0,Bajaj
9370,Harley-Davidson Street 750 ABS,450000.0,Jodhpur,3430.0,First Owner,4.0,750.0,Harley-Davidson
9371,Bajaj Dominar 400 ABS,139000.0,Hyderabad,21300.0,First Owner,4.0,400.0,Bajaj
9372,Bajaj Avenger Street 220,80000.0,Hyderabad,7127.0,First Owner,5.0,220.0,Bajaj


In [14]:
df.tail(10)


Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
9348,KTM Duke 390cc,195000.0,Mumbai,4568.0,First Owner,4.0,390.0,KTM
9349,Bajaj Pulsar 180cc,55000.0,Pune,10000.0,First Owner,6.0,180.0,Bajaj
9350,Bajaj Pulsar NS200,46000.0,Bangalore,27687.0,Third Owner,9.0,200.0,Bajaj
9360,Bajaj Pulsar NS200,48000.0,Allahabad,41939.0,First Owner,8.0,200.0,Bajaj
9361,Bajaj Avenger 220cc,50000.0,Bangalore,29134.0,First Owner,7.0,220.0,Bajaj
9362,Hero Hunk Rear Disc 150cc,25000.0,Delhi,48587.0,First Owner,8.0,150.0,Hero
9369,Bajaj Avenger 220cc,35000.0,Bangalore,60000.0,First Owner,9.0,220.0,Bajaj
9370,Harley-Davidson Street 750 ABS,450000.0,Jodhpur,3430.0,First Owner,4.0,750.0,Harley-Davidson
9371,Bajaj Dominar 400 ABS,139000.0,Hyderabad,21300.0,First Owner,4.0,400.0,Bajaj
9372,Bajaj Avenger Street 220,80000.0,Hyderabad,7127.0,First Owner,5.0,220.0,Bajaj


In [15]:
df.columns

Index(['bike_name', 'price', 'city', 'kms_driven', 'owner', 'age', 'power',
       'brand'],
      dtype='object')

In [16]:
df.brand

0                   TVS
1         Royal Enfield
2               Triumph
3                   TVS
4                Yamaha
             ...       
9362               Hero
9369              Bajaj
9370    Harley-Davidson
9371              Bajaj
9372              Bajaj
Name: brand, Length: 7324, dtype: object

In [17]:
df.price

0        35000.0
1       119900.0
2       600000.0
3        65000.0
4        80000.0
          ...   
9362     25000.0
9369     35000.0
9370    450000.0
9371    139000.0
9372     80000.0
Name: price, Length: 7324, dtype: float64

In [18]:
df.brand

0                   TVS
1         Royal Enfield
2               Triumph
3                   TVS
4                Yamaha
             ...       
9362               Hero
9369              Bajaj
9370    Harley-Davidson
9371              Bajaj
9372              Bajaj
Name: brand, Length: 7324, dtype: object

In [19]:
df['brand']

0                   TVS
1         Royal Enfield
2               Triumph
3                   TVS
4                Yamaha
             ...       
9362               Hero
9369              Bajaj
9370    Harley-Davidson
9371              Bajaj
9372              Bajaj
Name: brand, Length: 7324, dtype: object

In [20]:
df['owner']

0       First Owner
1       First Owner
2       First Owner
3       First Owner
4       First Owner
           ...     
9362    First Owner
9369    First Owner
9370    First Owner
9371    First Owner
9372    First Owner
Name: owner, Length: 7324, dtype: object

In [21]:
df['owner'].value_counts()

owner
First Owner             6642
Second Owner             588
Third Owner               84
Fourth Owner Or More      10
Name: count, dtype: int64

In [22]:
df['brand'].value_counts()

brand
Bajaj              2081
Royal Enfield      1346
Hero               1142
Honda               676
Yamaha              651
TVS                 481
KTM                 375
Suzuki              203
Harley-Davidson      91
Kawasaki             61
Hyosung              53
Mahindra             50
Benelli              46
Triumph              21
Ducati               20
BMW                  10
Jawa                  7
Indian                3
MV                    3
Rajdoot               1
LML                   1
Yezdi                 1
Ideal                 1
Name: count, dtype: int64

In [23]:
df['brand'].unique()

array(['TVS', 'Royal Enfield', 'Triumph', 'Yamaha', 'Honda', 'Hero',
       'Bajaj', 'Suzuki', 'Benelli', 'KTM', 'Mahindra', 'Kawasaki',
       'Ducati', 'Hyosung', 'Harley-Davidson', 'Jawa', 'BMW', 'Indian',
       'Rajdoot', 'LML', 'Yezdi', 'MV', 'Ideal'], dtype=object)

In [24]:
df['brand'].nunique()

23

In [25]:
df.shape

(7324, 8)

In [26]:
df.dtypes

bike_name      object
price         float64
city           object
kms_driven    float64
owner          object
age           float64
power         float64
brand          object
dtype: object

In [27]:
# brand = 'Royal Enfield'
# age = 2 se km 
# km = 50000


In [28]:
royal_df =df[df['brand']=='Royal Enfield']

In [29]:
royal_df

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
1,Royal Enfield Classic 350cc,119900.0,Delhi,11000.0,First Owner,4.0,350.0,Royal Enfield
8,Royal Enfield Thunderbird X 350cc,145000.0,Bangalore,9190.0,First Owner,3.0,350.0,Royal Enfield
9,Royal Enfield Classic Desert Storm 500cc,88000.0,Delhi,19000.0,Second Owner,7.0,500.0,Royal Enfield
23,Royal Enfield Classic Chrome 500cc,121700.0,Kalyan,24520.0,First Owner,5.0,500.0,Royal Enfield
36,Royal Enfield Classic 350cc,98800.0,Kochi,39000.0,First Owner,5.0,350.0,Royal Enfield
...,...,...,...,...,...,...,...,...
9261,Royal Enfield Classic 500cc,146006.0,Guwahati,8575.0,First Owner,4.0,500.0,Royal Enfield
9319,Royal Enfield Classic 350cc,100000.0,Chennai,25000.0,First Owner,10.0,350.0,Royal Enfield
9337,Royal Enfield Himalayan 410cc,120000.0,Gurgaon,8492.0,First Owner,5.0,410.0,Royal Enfield
9338,Royal Enfield Himalayan 410cc,138000.0,Delhi,5000.0,First Owner,5.0,410.0,Royal Enfield


In [30]:
royal_2age_df =royal_df[royal_df['age']<=2]

In [31]:
royal_2age_df[royal_2age_df['kms_driven']<=50000]

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
38,Royal Enfield Thunderbird X 500cc,190500.0,Samastipur,4550.0,First Owner,2.0,500.0,Royal Enfield
81,Royal Enfield Interceptor 650cc,260000.0,Navi Mumbai,3800.0,First Owner,2.0,650.0,Royal Enfield
139,Royal Enfield Himalayan 410cc Fi ABS,173300.0,Vadodara,14000.0,First Owner,2.0,410.0,Royal Enfield
194,Royal Enfield Electra 350cc,145000.0,Bangalore,4000.0,First Owner,2.0,350.0,Royal Enfield
222,Royal Enfield Himalayan 410cc Fi ABS,177982.0,Hamirpur(hp),7000.0,First Owner,2.0,410.0,Royal Enfield
...,...,...,...,...,...,...,...,...
7294,Royal Enfield Classic 350cc-Redditch Edition,133500.0,Mumbai,1608.0,First Owner,2.0,350.0,Royal Enfield
7694,Royal Enfield Classic Chrome 500cc ABS,215000.0,Delhi,417.0,First Owner,2.0,500.0,Royal Enfield
8139,Royal Enfield Thunderbird X 350cc ABS,169000.0,Bangalore,4411.0,First Owner,2.0,350.0,Royal Enfield
8192,Royal Enfield Thunderbird 350cc ABS,145000.0,Ghaziabad,12400.0,First Owner,2.0,350.0,Royal Enfield


In [32]:
# brand = tvs 
# age = 1se km 
# price <= 40000 
# owner = '1st owner'

In [33]:
royal_df =df[(df['brand']=='Royal Enfield')&
   (df['kms_driven']==30000)&
   (df['owner']=='First Owner')&
   (df['price']<=100000)]

In [34]:
royal_df

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
1311,Royal Enfield Classic 350cc,80000.0,Pune,30000.0,First Owner,5.0,350.0,Royal Enfield
3783,Royal Enfield Bullet Electra 350cc,65000.0,Delhi,30000.0,First Owner,11.0,350.0,Royal Enfield
5593,Royal Enfield Thunderbird 350cc,75000.0,Chennai,30000.0,First Owner,10.0,350.0,Royal Enfield
6485,Royal Enfield Thunderbird 350cc,70000.0,Delhi,30000.0,First Owner,11.0,350.0,Royal Enfield
6716,Royal Enfield Classic 350cc,100000.0,Chennai,30000.0,First Owner,5.0,350.0,Royal Enfield
6719,Royal Enfield Bullet Electra 350cc,75000.0,Delhi,30000.0,First Owner,10.0,350.0,Royal Enfield
7716,Royal Enfield Classic 350cc,100000.0,Delhi,30000.0,First Owner,7.0,350.0,Royal Enfield
8012,Royal Enfield Thunderbird 350cc,75000.0,Pune,30000.0,First Owner,9.0,350.0,Royal Enfield


In [35]:
df.query('brand == "Royal Enfield" and kms_driven == 30000 and owner == "First Owner" and price <= 100000')

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand
1311,Royal Enfield Classic 350cc,80000.0,Pune,30000.0,First Owner,5.0,350.0,Royal Enfield
3783,Royal Enfield Bullet Electra 350cc,65000.0,Delhi,30000.0,First Owner,11.0,350.0,Royal Enfield
5593,Royal Enfield Thunderbird 350cc,75000.0,Chennai,30000.0,First Owner,10.0,350.0,Royal Enfield
6485,Royal Enfield Thunderbird 350cc,70000.0,Delhi,30000.0,First Owner,11.0,350.0,Royal Enfield
6716,Royal Enfield Classic 350cc,100000.0,Chennai,30000.0,First Owner,5.0,350.0,Royal Enfield
6719,Royal Enfield Bullet Electra 350cc,75000.0,Delhi,30000.0,First Owner,10.0,350.0,Royal Enfield
7716,Royal Enfield Classic 350cc,100000.0,Delhi,30000.0,First Owner,7.0,350.0,Royal Enfield
8012,Royal Enfield Thunderbird 350cc,75000.0,Pune,30000.0,First Owner,9.0,350.0,Royal Enfield


In [36]:
df.query('brand=="TVS" and age<=1 and price == 40000 and owner== "First Owner"')

Unnamed: 0,bike_name,price,city,kms_driven,owner,age,power,brand


In [38]:
df.rename(columns={'kms_driven':'kilometer'},inplace=True) 

In [39]:
df 

Unnamed: 0,bike_name,price,city,kilometer,owner,age,power,brand
0,TVS Star City Plus Dual Tone 110cc,35000.0,Ahmedabad,17654.0,First Owner,3.0,110.0,TVS
1,Royal Enfield Classic 350cc,119900.0,Delhi,11000.0,First Owner,4.0,350.0,Royal Enfield
2,Triumph Daytona 675R,600000.0,Delhi,110.0,First Owner,8.0,675.0,Triumph
3,TVS Apache RTR 180cc,65000.0,Bangalore,16329.0,First Owner,4.0,180.0,TVS
4,Yamaha FZ S V 2.0 150cc-Ltd. Edition,80000.0,Bangalore,10000.0,First Owner,3.0,150.0,Yamaha
...,...,...,...,...,...,...,...,...
9362,Hero Hunk Rear Disc 150cc,25000.0,Delhi,48587.0,First Owner,8.0,150.0,Hero
9369,Bajaj Avenger 220cc,35000.0,Bangalore,60000.0,First Owner,9.0,220.0,Bajaj
9370,Harley-Davidson Street 750 ABS,450000.0,Jodhpur,3430.0,First Owner,4.0,750.0,Harley-Davidson
9371,Bajaj Dominar 400 ABS,139000.0,Hyderabad,21300.0,First Owner,4.0,400.0,Bajaj


In [42]:
df.drop(['bike_name','city'],axis=1,inplace=True)

In [44]:
df.to_csv('used_bikes_cleaned.csv',index=False)

In [45]:
# cat
# num 

df 

Unnamed: 0,price,kilometer,owner,age,power,brand
0,35000.0,17654.0,First Owner,3.0,110.0,TVS
1,119900.0,11000.0,First Owner,4.0,350.0,Royal Enfield
2,600000.0,110.0,First Owner,8.0,675.0,Triumph
3,65000.0,16329.0,First Owner,4.0,180.0,TVS
4,80000.0,10000.0,First Owner,3.0,150.0,Yamaha
...,...,...,...,...,...,...
9362,25000.0,48587.0,First Owner,8.0,150.0,Hero
9369,35000.0,60000.0,First Owner,9.0,220.0,Bajaj
9370,450000.0,3430.0,First Owner,4.0,750.0,Harley-Davidson
9371,139000.0,21300.0,First Owner,4.0,400.0,Bajaj


In [46]:
char =df.select_dtypes(include='object')

In [47]:
char 

Unnamed: 0,owner,brand
0,First Owner,TVS
1,First Owner,Royal Enfield
2,First Owner,Triumph
3,First Owner,TVS
4,First Owner,Yamaha
...,...,...
9362,First Owner,Hero
9369,First Owner,Bajaj
9370,First Owner,Harley-Davidson
9371,First Owner,Bajaj


In [48]:
num =df.select_dtypes(exclude='object')


In [49]:
num 

Unnamed: 0,price,kilometer,age,power
0,35000.0,17654.0,3.0,110.0
1,119900.0,11000.0,4.0,350.0
2,600000.0,110.0,8.0,675.0
3,65000.0,16329.0,4.0,180.0
4,80000.0,10000.0,3.0,150.0
...,...,...,...,...
9362,25000.0,48587.0,8.0,150.0
9369,35000.0,60000.0,9.0,220.0
9370,450000.0,3430.0,4.0,750.0
9371,139000.0,21300.0,4.0,400.0


In [51]:
char['owner'].unique( )

array(['First Owner', 'Second Owner', 'Third Owner',
       'Fourth Owner Or More'], dtype=object)

In [52]:
owner_dict = {
    'First Owner':1,
    'Second Owner':2,
    'Third Owner':3,
    'Fourth Owner Or More':4
}

In [53]:
char['owner']=char['owner'].map(owner_dict)

In [55]:
char['owner'].unique( )

array([1, 2, 3, 4], dtype=int64)

In [56]:
char['brand'].unique( )

array(['TVS', 'Royal Enfield', 'Triumph', 'Yamaha', 'Honda', 'Hero',
       'Bajaj', 'Suzuki', 'Benelli', 'KTM', 'Mahindra', 'Kawasaki',
       'Ducati', 'Hyosung', 'Harley-Davidson', 'Jawa', 'BMW', 'Indian',
       'Rajdoot', 'LML', 'Yezdi', 'MV', 'Ideal'], dtype=object)

In [57]:
from sklearn.preprocessing import LabelEncoder
le =LabelEncoder() 


char['brand']=le.fit_transform(char['brand'])

In [63]:
char['brand'].unique( )

array([19, 17, 20, 21,  6,  5,  1, 18,  2, 11, 15, 12,  3,  7,  4, 10,  0,
        9, 16, 13, 22, 14,  8])

In [60]:
char  

Unnamed: 0,owner,brand
0,1,19
1,1,17
2,1,20
3,1,19
4,1,21
...,...,...
9362,1,5
9369,1,1
9370,1,4
9371,1,1


In [61]:
clean_data=pd.concat([char,num],axis=1)

In [64]:
clean_data.to_csv('used_bikes_cleaned_main.csv',index=False)