# Pandas 

In [2]:
import pandas as pd

In [3]:
#check pandas version
print(pd.__version__)

1.3.4


# Series create, manipulate,querry, delete


In [6]:
#creating a series from a list
arr=[0,1,2,3,4]
s1=pd.Series(arr)
s1

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [8]:
order=[1,2,3,4,5]
s2=pd.Series(arr,index=order)
s2

1    0
2    1
3    2
4    3
5    4
dtype: int64

In [17]:
#creating a random ndarray
import numpy as np
n=np.random.randn(5)
index=['a','b','c','d','e']
s2=pd.Series(n, index=index)
s2

a   -0.056907
b   -0.964691
c   -0.634365
d    0.523601
e   -0.174943
dtype: float64

In [20]:
#creating a series from dictionary
d={'a':1,'b':2,'c':3,'d':4,'e':5}
s3=pd.Series(d)
s3

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [22]:
#modifying the index of data
print(s1)
s1.index=['A','B','C','D','E']
s1

0    0
1    1
2    2
3    3
4    4
dtype: int64


A    0
B    1
C    2
D    3
E    4
dtype: int64

In [23]:
#slicing
a=s1[:3]
a

A    0
B    1
C    2
dtype: int64

In [25]:
a=s1[:-1]
a

A    0
B    1
C    2
D    3
dtype: int64

In [27]:
a=s1[2:]
a

C    2
D    3
E    4
dtype: int64

In [28]:
s3

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [29]:
s4=s1.append(s3)
s4

A    0
B    1
C    2
D    3
E    4
a    1
b    2
c    3
d    4
e    5
dtype: int64

In [34]:
s4.drop('e')

A    0
B    1
C    2
D    3
E    4
a    1
b    2
c    3
d    4
dtype: int64

# Series Operation

In [39]:
arr1=[1,2,3,4,5,6,7]
arr2=[6,7,8,9,5]
s5=pd.Series(arr2)
s5

0    6
1    7
2    8
3    9
4    5
dtype: int64

In [40]:
s6=pd.Series(arr1)
s6

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

In [41]:
s5.add(s6)

0     7.0
1     9.0
2    11.0
3    13.0
4    10.0
5     NaN
6     NaN
dtype: float64

In [42]:
s5.sub(s6)

0    5.0
1    5.0
2    5.0
3    5.0
4    0.0
5    NaN
6    NaN
dtype: float64

In [50]:
s7=s5.mul(s6)
s7

0     6.0
1    14.0
2    24.0
3    36.0
4    25.0
5     NaN
6     NaN
dtype: float64

In [45]:
s5.div(s6)

0    6.000000
1    3.500000
2    2.666667
3    2.250000
4    1.000000
5         NaN
6         NaN
dtype: float64

In [51]:
print('median', s7.median())
print('max', s7.max())
print('min',s7.min())

median 24.0
max 36.0
min 6.0


# Creating Dataframe

In [57]:
date=pd.date_range('today',periods=6)#Define time sequence as index
num_arr=np.random.randn(6,4)#import numpy random array
columns=['A','B','C','D'] #Use the table as the column name

df1= pd.DataFrame(num_arr, index=date, columns=columns)
df1

Unnamed: 0,A,B,C,D
2022-04-22 17:26:43.452748,-1.146305,0.140122,0.439783,-0.275906
2022-04-23 17:26:43.452748,0.445599,0.491467,0.557689,1.1583
2022-04-24 17:26:43.452748,-0.536935,0.743914,1.258614,-0.686345
2022-04-25 17:26:43.452748,-0.275115,0.402303,1.169165,0.450015
2022-04-26 17:26:43.452748,1.674932,0.039706,-0.810085,-0.107175
2022-04-27 17:26:43.452748,-0.459018,-0.418756,0.691352,0.082191


In [61]:
#create dataframe with dictionary array

data={'animal':['cat','dog','lion','cat','dog','rat','snake','rat','snake','dog'],
      'age':[2.5,3.0,np.nan,5,6,9,np.nan,5.6,1,1.5],
      'visits':[1,2,1,2,1,2,3,4,1,2],
      'priority':['yes','no','yes','yes','no','yes','no','yes','yes','no']}
labels=['a','b','c','d','e','f','g','h','i','j']

df2=pd.DataFrame(data,index=labels)
df2

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,dog,3.0,2,no
c,lion,,1,yes
d,cat,5.0,2,yes
e,dog,6.0,1,no
f,rat,9.0,2,yes
g,snake,,3,no
h,rat,5.6,4,yes
i,snake,1.0,1,yes
j,dog,1.5,2,no


In [63]:
#see datatypes of array
df2.dtypes

animal       object
age         float64
visits        int64
priority     object
dtype: object

In [69]:
df3=df2.head(6)
df3

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,dog,3.0,2,no
c,lion,,1,yes
d,cat,5.0,2,yes
e,dog,6.0,1,no
f,rat,9.0,2,yes


In [71]:
df2.tail(3)

Unnamed: 0,animal,age,visits,priority
h,rat,5.6,4,yes
i,snake,1.0,1,yes
j,dog,1.5,2,no


In [74]:
print(df2.index)
df2.columns

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')


Index(['animal', 'age', 'visits', 'priority'], dtype='object')

In [75]:
df2.values

array([['cat', 2.5, 1, 'yes'],
       ['dog', 3.0, 2, 'no'],
       ['lion', nan, 1, 'yes'],
       ['cat', 5.0, 2, 'yes'],
       ['dog', 6.0, 1, 'no'],
       ['rat', 9.0, 2, 'yes'],
       ['snake', nan, 3, 'no'],
       ['rat', 5.6, 4, 'yes'],
       ['snake', 1.0, 1, 'yes'],
       ['dog', 1.5, 2, 'no']], dtype=object)

In [79]:
#seeing stastical data of dataframe
df2.describe()

Unnamed: 0,age,visits
count,8.0,10.0
mean,4.2,1.9
std,2.692317,0.994429
min,1.0,1.0
25%,2.25,1.0
50%,4.0,2.0
75%,5.7,2.0
max,9.0,4.0


In [80]:
df2.T

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
animal,cat,dog,lion,cat,dog,rat,snake,rat,snake,dog
age,2.5,3.0,,5.0,6.0,9.0,,5.6,1.0,1.5
visits,1,2,1,2,1,2,3,4,1,2
priority,yes,no,yes,yes,no,yes,no,yes,yes,no


In [81]:
df2.sort_values(by='age')

Unnamed: 0,animal,age,visits,priority
i,snake,1.0,1,yes
j,dog,1.5,2,no
a,cat,2.5,1,yes
b,dog,3.0,2,no
d,cat,5.0,2,yes
h,rat,5.6,4,yes
e,dog,6.0,1,no
f,rat,9.0,2,yes
c,lion,,1,yes
g,snake,,3,no


In [83]:
#slicing dataframe
df2[1:5]

Unnamed: 0,animal,age,visits,priority
b,dog,3.0,2,no
c,lion,,1,yes
d,cat,5.0,2,yes
e,dog,6.0,1,no


In [86]:
#query dataframe by tag
df2[['age','visits']]

Unnamed: 0,age,visits
a,2.5,1
b,3.0,2
c,,1
d,5.0,2
e,6.0,1
f,9.0,2
g,,3
h,5.6,4
i,1.0,1
j,1.5,2


In [88]:
#query row 2,3( iloc=integer location)
df2.iloc[1:3]

Unnamed: 0,animal,age,visits,priority
b,dog,3.0,2,no
c,lion,,1,yes


In [90]:
df3=df2.copy()
df3

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,dog,3.0,2,no
c,lion,,1,yes
d,cat,5.0,2,yes
e,dog,6.0,1,no
f,rat,9.0,2,yes
g,snake,,3,no
h,rat,5.6,4,yes
i,snake,1.0,1,yes
j,dog,1.5,2,no


In [91]:
df3.isnull()

Unnamed: 0,animal,age,visits,priority
a,False,False,False,False
b,False,False,False,False
c,False,True,False,False
d,False,False,False,False
e,False,False,False,False
f,False,False,False,False
g,False,True,False,False
h,False,False,False,False
i,False,False,False,False
j,False,False,False,False


In [93]:
df3.loc['f','age']=6.0 #(loc=location)
df3

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,dog,3.0,2,no
c,lion,,1,yes
d,cat,5.0,2,yes
e,dog,6.0,1,no
f,rat,6.0,2,yes
g,snake,,3,no
h,rat,5.6,4,yes
i,snake,1.0,1,yes
j,dog,1.5,2,no


In [109]:
df3.mean()

  df3.mean()


age       3.825
visits    1.900
dtype: float64

In [104]:
print(df3['visits'].sum())
print(df3['visits'].min())
df3['visits'].max()

19
1


4

In [105]:
df3.sum()

animal      catdoglioncatdogratsnakeratsnakedog
age                                        30.6
visits                                       19
priority             yesnoyesyesnoyesnoyesyesno
dtype: object

In [119]:
string=pd.Series(['A','B','C','Aaa',np.nan,'cow','owl','goat','bAA'])
string.str.lower()

0       a
1       b
2       c
3     aaa
4     NaN
5     cow
6     owl
7    goat
8     baa
dtype: object

# Oprations for DataFrame missing values

In [121]:
df4=df3.copy()
meanAge=df4['age'].mean()
df4['age'].fillna(meanAge)

a    2.500
b    3.000
c    3.825
d    5.000
e    6.000
f    6.000
g    3.825
h    5.600
i    1.000
j    1.500
Name: age, dtype: float64

In [126]:
df5=df3.copy()
df5.dropna(how='any')

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,dog,3.0,2,no
d,cat,5.0,2,yes
e,dog,6.0,1,no
f,rat,6.0,2,yes
h,rat,5.6,4,yes
i,snake,1.0,1,yes
j,dog,1.5,2,no


# DataFrame file oprations

In [127]:
df3.to_csv('animal.csv')

In [131]:
df_animal=pd.read_csv('animal.csv')
df_animal.head(3)

Unnamed: 0.1,Unnamed: 0,animal,age,visits,priority
0,a,cat,2.5,1,yes
1,b,dog,3.0,2,no
2,c,lion,,1,yes


In [142]:
df3.to_excel('animal.xlsx',sheet_name='Sheet1')
df_animal2=pd.read_excel('animal.xlsx','Sheet1', index_col=None, na_values=['NA'])
df_animal2

Unnamed: 0.1,Unnamed: 0,animal,age,visits,priority
0,a,cat,2.5,1,yes
1,b,dog,3.0,2,no
2,c,lion,,1,yes
3,d,cat,5.0,2,yes
4,e,dog,6.0,1,no
5,f,rat,6.0,2,yes
6,g,snake,,3,no
7,h,rat,5.6,4,yes
8,i,snake,1.0,1,yes
9,j,dog,1.5,2,no
