
1. Basics of Pandas: DataFrame, Series, and Panel
2. How to read data from a CSV
3. Hands-on exploration with real-life examples
4. Industry use case

# **Series**

In [8]:
import pandas as pd
data = [100, 200, 300, 400] #list
sales = pd.Series(data, index=['Jan', 'Feb', 'Mar', 'Apr'])
print(sales)


Jan    100
Feb    200
Mar    300
Apr    400
dtype: int64


In [10]:
sales['Jan'] #series[']

100

# **DataFrame**

In [11]:
d = {
    "Samosa" : [100 ,200],
    "kachori": [1000,2000]
}

d

{'Samosa': [100, 200], 'kachori': [1000, 2000]}

In [12]:
pd.DataFrame(d)

Unnamed: 0,Samosa,kachori
0,100,1000
1,200,2000


In [14]:
data = {
    'City': ['Mumbai', 'Delhi', 'Bangalore', 'Chennai'],
    'Population (millions)': [20.4, 30.3, 12.3, 10.9],
    'Average Temperature (°C)': [27, 25, 22, 28]
}
df = pd.DataFrame(data)
df


Unnamed: 0,City,Population (millions),Average Temperature (°C)
0,Mumbai,20.4,27
1,Delhi,30.3,25
2,Bangalore,12.3,22
3,Chennai,10.9,28


# **Panel **
Note:
"Pandas Panel is now deprecated, but it was used for handling 3D data. For multi-dimensional data, it’s better to use libraries like NumPy or xarray."

# **Reading CSV Files**

In [3]:
df = pd.read_csv('india_covid_cases.csv')
print(df.head())


         State  Total Cases  Recovered  Deaths
0  Maharashtra      2000000    1800000   50000
1       Kerala      1500000    1450000   40000
2    Karnataka      1200000    1150000   30000
3   Tamil Nadu      1100000    1080000   20000
4        Delhi      1400000    1350000   25000


# Initial Exploration of Data
# Commands to Know:
1. df.head() → First 5 rows
2. df.tail() → Last 5 rows
3. df.shape → Number of rows and columns
4. df.info() → Metadata
5. df.describe() → Statistical summary
6. df.columns → Column names

In [15]:
df = pd.read_csv('india_covid_cases.csv')
df

Unnamed: 0,State,Total Cases,Recovered,Deaths
0,Maharashtra,2000000,1800000,50000
1,Kerala,1500000,1450000,40000
2,Karnataka,1200000,1150000,30000
3,Tamil Nadu,1100000,1080000,20000
4,Delhi,1400000,1350000,25000


In [17]:
df.head(2)# starting 5 rows

Unnamed: 0,State,Total Cases,Recovered,Deaths
0,Maharashtra,2000000,1800000,50000
1,Kerala,1500000,1450000,40000


In [18]:
df.tail(2)# starting 5 rows

Unnamed: 0,State,Total Cases,Recovered,Deaths
3,Tamil Nadu,1100000,1080000,20000
4,Delhi,1400000,1350000,25000


In [19]:
df.shape # rows,columns

(5, 4)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   State        5 non-null      object
 1   Total Cases  5 non-null      int64 
 2   Recovered    5 non-null      int64 
 3   Deaths       5 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 288.0+ bytes


In [21]:
df.describe()

Unnamed: 0,Total Cases,Recovered,Deaths
count,5.0,5.0,5.0
mean,1440000.0,1366000.0,33000.0
std,350713.6,284657.7,12041.594579
min,1100000.0,1080000.0,20000.0
25%,1200000.0,1150000.0,25000.0
50%,1400000.0,1350000.0,30000.0
75%,1500000.0,1450000.0,40000.0
max,2000000.0,1800000.0,50000.0


In [22]:
df.columns

Index(['State', 'Total Cases', 'Recovered', 'Deaths'], dtype='object')

# **Creating a DataFrame from Arrays**

In [25]:
import numpy as np
arr = np.array([[1, 2], [3, 4], [5, 6]])
df2 = pd.DataFrame(arr, columns=['Gaju', 'Kaju'],index=['a','b','c'])
df2


Unnamed: 0,Gaju,Kaju
a,1,2
b,3,4
c,5,6


# **Extracting Columns from a DataFrame**

In [26]:
df

Unnamed: 0,State,Total Cases,Recovered,Deaths
0,Maharashtra,2000000,1800000,50000
1,Kerala,1500000,1450000,40000
2,Karnataka,1200000,1150000,30000
3,Tamil Nadu,1100000,1080000,20000
4,Delhi,1400000,1350000,25000


In [27]:
s = df['Total Cases']
s


Unnamed: 0,Total Cases
0,2000000
1,1500000
2,1200000
3,1100000
4,1400000


In [28]:
type(s)

In [29]:
ns = df[["State",'Total Cases']]
ns

Unnamed: 0,State,Total Cases
0,Maharashtra,2000000
1,Kerala,1500000
2,Karnataka,1200000
3,Tamil Nadu,1100000
4,Delhi,1400000


In [30]:
type(ns)

# **Industry Use Case: E-Commerce in India**

In [7]:
orders = pd.read_csv('flipkart_orders.csv')
print(orders.groupby('City')['Revenue'].sum().sort_values(ascending=False))


City
Mumbai       2100
Delhi         800
Bangalore     500
Chennai       300
Name: Revenue, dtype: int64
