In [1]:
#Import the library
import pandas as pd
#Create an empty data frame with column names
df = pd.DataFrame(columns = ['company','model','year'])
#Add records
df.loc[0] = ['Tata', 'Nexon', 2017] 
df.loc[1] = ['MG', 'Astor', 2021] 
df.loc[2] = ['KIA', 'Seltos', 2019] 
df.loc[3] = ['Hyundai', 'Creta', 2015] 
#Print the dataframe 
df

Unnamed: 0,company,model,year
0,Tata,Nexon,2017
1,MG,Astor,2021
2,KIA,Seltos,2019
3,Hyundai,Creta,2015


In [2]:
import numpy as np

# Pass a 2D numpy array - each row is the corresponding row in the dataframe
data = np.array([['Tata','Nexon',2017], ['MG','Astor',2021], ['KIA','Seltos', 2019], ['Hyundai','Creta',2015]])

In [3]:
# pass column names in the columns parameter of the constructor
df = pd.DataFrame(data, columns = ['company', 'model','year'])
df

Unnamed: 0,company,model,year
0,Tata,Nexon,2017
1,MG,Astor,2021
2,KIA,Seltos,2019
3,Hyundai,Creta,2015


In [4]:
import pandas as pd

# List of records
data = [
    ['Tata','Nexon',2017],
    ['MG','Astor',2021],
    ['KIA','Seltos',2019],
    ['Hyundai','Creta',2015]
]

# Create DataFrame directly
df = pd.DataFrame(data, columns=['company','model','year'])

print(df)


   company   model  year
0     Tata   Nexon  2017
1       MG   Astor  2021
2      KIA  Seltos  2019
3  Hyundai   Creta  2015


In [6]:
#Create DataFrame using Dictionary of Lists
import pandas as pd

# Dictionary → keys = column names, values = list of column data
data = {
    'company': ['Tata','MG','KIA','Hyundai'],
    'model':   ['Nexon','Astor','Seltos','Creta'],
    'year':    [2017,2021,2019,2015]
}

# Create DataFrame
df = pd.DataFrame.from_dict(data)
print(df)


   company   model  year
0     Tata   Nexon  2017
1       MG   Astor  2021
2      KIA  Seltos  2019
3  Hyundai   Creta  2015


In [7]:
#Create DataFrame using List of Dictionaries
import pandas as pd

# Each dictionary = one row (record)
data = [
    {'company':'Tata',    'model':'Nexon',  'year':2017},
    {'company':'MG',      'model':'Astor',  'year':2021},
    {'company':'KIA',     'model':'Seltos', 'year':2019},
    {'company':'Hyundai', 'model':'Creta',  'year':2015}
]

# Create DataFrame
df = pd.DataFrame(data)
print(df)


   company   model  year
0     Tata   Nexon  2017
1       MG   Astor  2021
2      KIA  Seltos  2019
3  Hyundai   Creta  2015


In [8]:
#using zip()

In [9]:
import pandas as pd 

# Lists
company = ['Tata','MG','KIA','Hyundai'] 
model   = ['Nexon','Astor','Seltos','Creta'] 
year    = [2017,2021,2019,2015] 

# Merge lists into tuples using zip
data = list(zip(company, model, year))

# Create DataFrame
df = pd.DataFrame(data, columns=['company','model','year']) 

print(df)


   company   model  year
0     Tata   Nexon  2017
1       MG   Astor  2021
2      KIA  Seltos  2019
3  Hyundai   Creta  2015


In [10]:
#understand the data(EDA)
import pandas as pd

df = pd.DataFrame({
    'company': ['Tata','MG','KIA','Hyundai'],
    'model':   ['Nexon','Astor','Seltos','Creta'],
    'year':    [2017,2021,2019,2015]
})


In [11]:
df

Unnamed: 0,company,model,year
0,Tata,Nexon,2017
1,MG,Astor,2021
2,KIA,Seltos,2019
3,Hyundai,Creta,2015


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   company  4 non-null      object
 1   model    4 non-null      object
 2   year     4 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 224.0+ bytes


In [13]:
df.describe()

Unnamed: 0,year
count,4.0
mean,2018.0
std,2.581989
min,2015.0
25%,2016.5
50%,2018.0
75%,2019.5
max,2021.0


In [16]:
df.shape

(4, 3)

In [17]:
df.size

12

In [18]:
df.loc[0]          # Get row at index 0
df.loc[0:2]        # Get rows from index 0 to 2 (inclusive)
df.loc[:, 'model'] # Get only 'model' column
df.loc[1, 'year']  # Access single cell (row=1, column='year')

2021

In [19]:
df.sort_values(by='year')            # Sort by year ascending
df.sort_values(by='company')         # Sort by company alphabetically
df.sort_values(by='year', ascending=False) # Sort by year descending


Unnamed: 0,company,model,year
1,MG,Astor,2021
2,KIA,Seltos,2019
0,Tata,Nexon,2017
3,Hyundai,Creta,2015


In [20]:
df['company'].value_counts()

Tata       1
MG         1
KIA        1
Hyundai    1
Name: company, dtype: int64

In [22]:
#Adding rows with missing/invalid values
#heart of data cleaning or preprocessing
import pandas as pd

df = pd.DataFrame({
    'company': ['Tata','MG','KIA','Hyundai'],
    'model':   ['Nexon','Astor','Seltos','Creta'],
    'year':    [2017,2021,2019,2015]
})

# Add rows with missing/invalid data
df.loc[4] = ['Honda', 'Jazz', None]      # Missing year
df.loc[5] = [None, None, None]           # Entire row missing
df.loc[6] = ['Toyota', None , 2018]      # Missing model
df.loc[7] = ['Tata','Nexon',2017]        # Duplicate row

# Add empty column
df["newcolumn"] = None

print(df)

   company   model  year newcolumn
0     Tata   Nexon  2017      None
1       MG   Astor  2021      None
2      KIA  Seltos  2019      None
3  Hyundai   Creta  2015      None
4    Honda    Jazz  None      None
5     None    None  None      None
6   Toyota    None  2018      None
7     Tata   Nexon  2017      None


In [23]:
df.isnull()

Unnamed: 0,company,model,year,newcolumn
0,False,False,False,True
1,False,False,False,True
2,False,False,False,True
3,False,False,False,True
4,False,False,True,True
5,True,True,True,True
6,False,True,False,True
7,False,False,False,True


In [24]:
df.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7     True
dtype: bool

In [25]:
df.fillna("No Value Available", inplace=True)

In [26]:
df.drop(columns='newcolumn', axis=1, inplace=True)

In [27]:
print(df)

              company               model                year
0                Tata               Nexon                2017
1                  MG               Astor                2021
2                 KIA              Seltos                2019
3             Hyundai               Creta                2015
4               Honda                Jazz  No Value Available
5  No Value Available  No Value Available  No Value Available
6              Toyota  No Value Available                2018
7                Tata               Nexon                2017
