<center><h1><b>Pandas Basics</h1></center>

## 1) Importing Pandas and NumPy

In [27]:
import pandas as pd
import numpy as np

## 2) Checking Pandas Version

In [28]:
#To find version of pandas
pd.__version__

'2.3.1'

## 3) Creating a Simple Pandas Series

In [29]:
a = pd.Series([1,3,2,4,5])
print(a)

0    1
1    3
2    2
3    4
4    5
dtype: int64


## 4) Creating a Series with Custom Index

In [30]:
a = pd.Series([1,3,2,4,5],index=['a','b','c','d','e'],name = 'Number')
print(a)

a    1
b    3
c    2
d    4
e    5
Name: Number, dtype: int64


## 5) Accessing Elements of a Series

In [31]:
print(a.iloc[1])

3


## 6) Creating a DataFrame from a NumPy Array

In [32]:
a = np.random.randint(4,6,(3,4))
ans = pd.DataFrame(a, columns= ['a','b','c','d'])
print(ans)

   a  b  c  d
0  5  5  4  4
1  5  5  5  5
2  5  5  4  5


## 7) Creating a DataFrame with Custom Index and Columns

In [33]:
a = np.random.randint(0,5,(3,3))
ans = pd.DataFrame(a, index = ['1st','2nd','3rd'], columns= ['a','b','c'])
print(ans)

     a  b  c
1st  0  2  1
2nd  4  4  0
3rd  2  1  4


## 8) Creating a DataFrame from a List of Lists

In [34]:
a = [['axlin',21],['sanj',35],['hri',24]]
ans = pd.DataFrame(a, columns= ['Name','Age'])
print(ans)

    Name  Age
0  axlin   21
1   sanj   35
2    hri   24


## 9) Creating a DataFrame from a Dictionary

In [35]:
x = {'Employee':['axl','san','hri'], 'salary': [12000,55000,70000]}
ans = pd.DataFrame(x)
print(ans)

  Employee  salary
0      axl   12000
1      san   55000
2      hri   70000


## 10) Creating a DataFrame from a List of Dictionaries

In [36]:
a = [{'a':1,'b':2,'c':3}, {'a':4,'b':5,'c':8}]
ans = pd.DataFrame(a)
print(ans)

   a  b  c
0  1  2  3
1  4  5  8


## 11) Creating a DataFrame from a List of Dictionaries (with Missing Keys)

In [37]:
a = [{'a':10,'c':20},{'a':40,'b':50,'c':60}]
ans = pd.DataFrame(a)
print(ans)

    a   c     b
0  10  20   NaN
1  40  60  50.0


## 12) Creating a DataFrame by Zipping Two Lists (with Missing Values)

In [38]:
name = ['axl','san','hri','sana']
age = [33,44,22,np.nan]
x = list(zip(name,age))
ans = pd.DataFrame(x, columns=['Name','Age'])
print(ans)

   Name   Age
0   axl  33.0
1   san  44.0
2   hri  22.0
3  sana   NaN


## 13) Creating a DataFrame from a Dictionary with Multiple Columns

In [39]:
names = {'Name':['Anu', 'Reena', 'Jaan', 'Hema', 'Rathi'], 'Marks':[50,60,70,80,90],
        'Subject':['Tamil','Botany', 'English', 'Physics', 'Chemistry']}
x_ = pd.DataFrame(names)
print(x_)

    Name  Marks    Subject
0    Anu     50      Tamil
1  Reena     60     Botany
2   Jaan     70    English
3   Hema     80    Physics
4  Rathi     90  Chemistry


## 14) Creating a DataFrame of Students and Their Marks

In [40]:
students = [f'student{i}' for i in range(1,11)]
marks = np.random.randint(70,101,(10,3))
marks_ = pd.DataFrame(marks, columns = ['Maths','Physics', 'English'])
marks_.insert(0,'Students Name',students)
print(marks_)

  Students Name  Maths  Physics  English
0      student1     75      100       81
1      student2    100       74       92
2      student3     83       96       86
3      student4     75       75       78
4      student5     77       71       90
5      student6     79       82       99
6      student7     93       99       74
7      student8     83       92       77
8      student9     98       74       76
9     student10     99       84       78


## 15) Creating a DataFrame with Random Floating-Point Numbers

In [41]:
a =  np.random.randn(100,5)
a_ = pd.DataFrame(a, columns =['A','B','C','D','E'])
print(a_)

           A         B         C         D         E
0  -1.914711 -0.002019  1.387434  0.543966 -0.218864
1   1.068507 -0.295374 -0.263047  0.302570  0.563027
2  -0.319268  0.132368 -0.077698 -0.440674 -0.350669
3  -1.051755 -0.000681 -0.529901  0.425359 -0.214126
4   1.071843  0.081798  1.213550 -0.662926  0.730024
..       ...       ...       ...       ...       ...
95  0.261348  0.970844  0.318032  1.552001 -0.229287
96  0.955046 -0.278354  0.674409  1.545745 -0.967440
97  0.280959 -0.587499  0.911280 -0.395575 -1.184867
98  2.332370 -0.571715 -1.748772 -1.627472  0.537277
99  0.957177 -1.372543 -0.833756 -1.115050 -1.756376

[100 rows x 5 columns]


## 16) Viewing the First Five Rows of the DataFrame

In [42]:
a_.head()

Unnamed: 0,A,B,C,D,E
0,-1.914711,-0.002019,1.387434,0.543966,-0.218864
1,1.068507,-0.295374,-0.263047,0.30257,0.563027
2,-0.319268,0.132368,-0.077698,-0.440674,-0.350669
3,-1.051755,-0.000681,-0.529901,0.425359,-0.214126
4,1.071843,0.081798,1.21355,-0.662926,0.730024


## 17) Viewing the First Ten Rows of the DataFrame

In [43]:
a_.head(10)

Unnamed: 0,A,B,C,D,E
0,-1.914711,-0.002019,1.387434,0.543966,-0.218864
1,1.068507,-0.295374,-0.263047,0.30257,0.563027
2,-0.319268,0.132368,-0.077698,-0.440674,-0.350669
3,-1.051755,-0.000681,-0.529901,0.425359,-0.214126
4,1.071843,0.081798,1.21355,-0.662926,0.730024
5,-1.549474,-0.566583,0.532273,-0.091349,-0.499645
6,0.988605,0.22643,-0.44336,-1.682201,0.169488
7,1.028564,-0.809243,-2.020595,0.396693,-0.666677
8,-0.593141,0.567812,-0.364503,-1.786213,-0.654747
9,-0.344246,-0.115809,0.880579,-1.9038,0.697314


## 18) Viewing the Last Five Rows of the DataFrame

In [44]:
a_.tail()

Unnamed: 0,A,B,C,D,E
95,0.261348,0.970844,0.318032,1.552001,-0.229287
96,0.955046,-0.278354,0.674409,1.545745,-0.96744
97,0.280959,-0.587499,0.91128,-0.395575,-1.184867
98,2.33237,-0.571715,-1.748772,-1.627472,0.537277
99,0.957177,-1.372543,-0.833756,-1.11505,-1.756376


## 19) Viewing the Last Ten Rows of the DataFrame

In [45]:
a_.tail(10)

Unnamed: 0,A,B,C,D,E
90,0.488718,-0.327955,0.642974,-0.8684,1.766787
91,1.108552,0.948486,0.787398,-0.716789,-0.885965
92,2.249024,-0.544442,-1.228596,-0.413989,0.52486
93,-0.05524,-0.54299,-1.471368,-1.318913,0.383169
94,0.236425,-0.364435,-0.004251,1.439305,0.980492
95,0.261348,0.970844,0.318032,1.552001,-0.229287
96,0.955046,-0.278354,0.674409,1.545745,-0.96744
97,0.280959,-0.587499,0.91128,-0.395575,-1.184867
98,2.33237,-0.571715,-1.748772,-1.627472,0.537277
99,0.957177,-1.372543,-0.833756,-1.11505,-1.756376


## 20) Getting Information About the DataFrame

In [46]:
a_.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       100 non-null    float64
 1   B       100 non-null    float64
 2   C       100 non-null    float64
 3   D       100 non-null    float64
 4   E       100 non-null    float64
dtypes: float64(5)
memory usage: 4.0 KB


## 21) Getting the Shape of the DataFrame

In [47]:
a_.shape

(100, 5)

## 22) Getting Descriptive Statistics of the DataFrame

In [48]:
a_.describe()

Unnamed: 0,A,B,C,D,E
count,100.0,100.0,100.0,100.0,100.0
mean,-0.092019,0.002187,0.023218,-0.143945,-0.19902
std,1.146341,0.960349,0.960551,0.953628,1.038314
min,-2.314232,-3.111751,-2.154717,-1.91598,-2.710774
25%,-0.763541,-0.613899,-0.565997,-0.84212,-0.861494
50%,0.04014,0.015377,-0.031659,-0.266038,-0.224076
75%,0.621651,0.670366,0.72148,0.407687,0.496625
max,3.594472,2.750569,2.566023,2.5186,2.663914
