# Pandas Introduction

Pandas is a powerful and flexible open-source data analysis and manipulation library for Python. It provides data structures like Series (1-dimensional) and DataFrame (2-dimensional) that make it easy to work with structured data. Pandas is widely used for data cleaning, exploration, transformation, and visualization in data science and machine learning workflows.

In [3]:
import pandas as pd


In [6]:
#series
#a pandas Series is a one-dimensional array-like object that can hold any data type.
# It is similar to a column in a spreadsheet or a single column in a DataFrame.


data =[1,2,3,4,5]
s = pd.Series(data)
print("Series:\n", s)
print("Data type of Series:", type(s))

Series:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
Data type of Series: <class 'pandas.core.series.Series'>


In [8]:
data={'a':1,'b':2,'c':3}
series_dict= pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [11]:
data = [10,20,30,40]
index=['a','b','c','d']
pd.Series(data,index=index)

a    10
b    20
c    30
d    40
dtype: int64

In [16]:
#dataframe
# create a dataframe from a dictionary of list

data={
  'name':['krish','john','jack'],
  'age':[23,45,32],
  'city':['Banglore','New york','Cicago']
}
df=pd.DataFrame(data)
print(df)

    name  age      city
0  krish   23  Banglore
1   john   45  New york
2   jack   32    Cicago


In [17]:
import numpy as np
np.array(df)

array([['krish', 23, 'Banglore'],
       ['john', 45, 'New york'],
       ['jack', 32, 'Cicago']], dtype=object)

In [67]:
#create a dataframe from list of dictionary

data=[
  {'Name':'krish','Age':20,'City':'Banglore'},
   {'Name':'John','Age':25,'City':'Bankok'},
   {'Name':'JAI','Age':32,'City':'Cicago'},
    {'Name':'Jessi','Age':38,'City':'Chennai'},
]
df = pd.DataFrame(data)
print(df)


    Name  Age      City
0  krish   20  Banglore
1   John   25    Bankok
2    JAI   32    Cicago
3  Jessi   38   Chennai


In [68]:
df=pd.read_csv('sales.csv')
df.head(5)

Unnamed: 0,Date,Region,Product,Quantity,Unit_Price,Total_Sales
0,2025-01-01,North,Widget A,10,25.5,255.0
1,2025-01-02,South,Widget B,5,40.0,200.0
2,2025-01-03,East,Widget A,7,25.5,178.5
3,2025-01-04,West,Widget C,3,60.0,180.0
4,2025-01-05,North,Widget B,8,40.0,320.0


In [69]:
df.tail(6)

Unnamed: 0,Date,Region,Product,Quantity,Unit_Price,Total_Sales
4,2025-01-05,North,Widget B,8,40.0,320.0
5,2025-01-06,South,Widget C,6,60.0,360.0
6,2025-01-07,East,Widget B,9,40.0,360.0
7,2025-01-08,West,Widget A,4,25.5,102.0
8,2025-01-09,North,Widget C,2,60.0,120.0
9,2025-01-10,South,Widget A,5,25.5,127.5


In [70]:

df

Unnamed: 0,Date,Region,Product,Quantity,Unit_Price,Total_Sales
0,2025-01-01,North,Widget A,10,25.5,255.0
1,2025-01-02,South,Widget B,5,40.0,200.0
2,2025-01-03,East,Widget A,7,25.5,178.5
3,2025-01-04,West,Widget C,3,60.0,180.0
4,2025-01-05,North,Widget B,8,40.0,320.0
5,2025-01-06,South,Widget C,6,60.0,360.0
6,2025-01-07,East,Widget B,9,40.0,360.0
7,2025-01-08,West,Widget A,4,25.5,102.0
8,2025-01-09,North,Widget C,2,60.0,120.0
9,2025-01-10,South,Widget A,5,25.5,127.5


In [71]:
## acessing data from dataframe
data

[{'Name': 'krish', 'Age': 20, 'City': 'Banglore'},
 {'Name': 'John', 'Age': 25, 'City': 'Bankok'},
 {'Name': 'JAI', 'Age': 32, 'City': 'Cicago'},
 {'Name': 'Jessi', 'Age': 38, 'City': 'Chennai'}]

In [72]:
DF = pd.DataFrame(data)

In [73]:
DF

Unnamed: 0,Name,Age,City
0,krish,20,Banglore
1,John,25,Bankok
2,JAI,32,Cicago
3,Jessi,38,Chennai


In [74]:
DF["Name"]
type(DF["Name"])

pandas.core.series.Series

In [75]:
DF.loc[0]

Name       krish
Age           20
City    Banglore
Name: 0, dtype: object

In [76]:
DF.loc[0][0]

  DF.loc[0][0]


'krish'

In [77]:
DF.iloc[0][2]

  DF.iloc[0][2]


'Banglore'

In [79]:
# accessing a specified element
print(DF.at[2,'Age'])

32


In [80]:
print(DF.at[2,'Name'])

JAI


In [81]:
DF.iat[2,2]

'Cicago'

In [82]:
DF

Unnamed: 0,Name,Age,City
0,krish,20,Banglore
1,John,25,Bankok
2,JAI,32,Cicago
3,Jessi,38,Chennai


In [83]:
## Data manipulation
DF

Unnamed: 0,Name,Age,City
0,krish,20,Banglore
1,John,25,Bankok
2,JAI,32,Cicago
3,Jessi,38,Chennai


In [84]:
DF['Salary']=[50000,60000,70000,80000]
DF

Unnamed: 0,Name,Age,City,Salary
0,krish,20,Banglore,50000
1,John,25,Bankok,60000
2,JAI,32,Cicago,70000
3,Jessi,38,Chennai,80000


In [85]:
#remove a column temporarly
DF.drop('Salary',axis=1)

Unnamed: 0,Name,Age,City
0,krish,20,Banglore
1,John,25,Bankok
2,JAI,32,Cicago
3,Jessi,38,Chennai


In [86]:
DF

Unnamed: 0,Name,Age,City,Salary
0,krish,20,Banglore,50000
1,John,25,Bankok,60000
2,JAI,32,Cicago,70000
3,Jessi,38,Chennai,80000


In [87]:
#remove a column permanentaly
DF.drop('Salary',axis=1,inplace=True)

In [88]:
DF

Unnamed: 0,Name,Age,City
0,krish,20,Banglore
1,John,25,Bankok
2,JAI,32,Cicago
3,Jessi,38,Chennai


In [90]:
# Add age to the column

DF['Age'] = DF['Age'] + 1

In [91]:
DF

Unnamed: 0,Name,Age,City
0,krish,21,Banglore
1,John,26,Bankok
2,JAI,33,Cicago
3,Jessi,39,Chennai


In [93]:
DF.drop(0,inplace=True)

In [94]:
DF

Unnamed: 0,Name,Age,City
1,John,26,Bankok
2,JAI,33,Cicago
3,Jessi,39,Chennai


In [99]:
##Display the data types of each column
print("Data types:\n",df.dtypes)

#Describe the DataFrame
print("Statistical summary:\n",df.describe())



Data types:
 Date            object
Region          object
Product         object
Quantity         int64
Unit_Price     float64
Total_Sales    float64
dtype: object
Statistical summary:
         Quantity  Unit_Price  Total_Sales
count  10.000000   10.000000     10.00000
mean    5.900000   40.200000    220.30000
std     2.601282   15.057667     98.10799
min     2.000000   25.500000    102.00000
25%     4.250000   25.500000    140.25000
50%     5.500000   40.000000    190.00000
75%     7.750000   55.000000    303.75000
max    10.000000   60.000000    360.00000


In [100]:
df.describe()

Unnamed: 0,Quantity,Unit_Price,Total_Sales
count,10.0,10.0,10.0
mean,5.9,40.2,220.3
std,2.601282,15.057667,98.10799
min,2.0,25.5,102.0
25%,4.25,25.5,140.25
50%,5.5,40.0,190.0
75%,7.75,55.0,303.75
max,10.0,60.0,360.0
