# Pandas

Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.

# Key Features of Pandas

#1.Tools for loading data from different file formats
#2.Handles missing values
#3.Reshaping and pivoting of data sets
#4.Merging and joining of data sets
#5.Fast and efficient tool for manipulating and analyzing data

# Types of Data Structures :
    Pandas deals with two types of data structures
    1. Series
    2. DataFrame

In [2]:
import numpy as np                      #importing numpy as pandas library
import pandas as pd

# Pandas -- Series

In [3]:
pd.Series()

  """Entry point for launching an IPython kernel.


Series([], dtype: float64)

In [7]:
data = [1,2,3]               # here by default index is 0,1,2
s = pd.Series(data)      
print(s)

0    1
1    2
2    3
dtype: int64


In [6]:
data = (1,2,2)                                   #here we defined 100,200,300 as an index
s = pd.Series(data,index=[100,200,500])
print(s)

100    1
200    2
500    2
dtype: int64


In [10]:
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[100,101,102,103])
print(s)

100    a
101    b
102    c
103    d
dtype: object


# Creating a series from dictionary

In [23]:
data = {'a':1,'b':2,'c':3}
a = pd.Series(data)
print(a)

a    1
b    2
c    3
dtype: int64


In [66]:
 a = pd.Series(8.,index=['a','b','c','d'],dtype='complex')
 a   

a    8.000000+0.000000j
b    8.000000+0.000000j
c    8.000000+0.000000j
d    8.000000+0.000000j
dtype: complex128

# DataFrames

In [72]:
a=pd.DataFrame()
print(a)


Empty DataFrame
Columns: []
Index: []


In [70]:
a=[1,2]
print(a)

[1, 2]


# DataFrame From a List

In [80]:
a = ([[1,2,7,5,4],[4,5,6,8,5]])
df = pd.DataFrame(a)
df

Unnamed: 0,0,1,2,3,4
0,1,2,7,5,4
1,4,5,6,8,5


In [81]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [82]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
df

Unnamed: 0,Name,Age
0,Alex,10.0
1,Bob,12.0
2,Clarke,13.0


# DataFrame From List of Dictionary

In [83]:
import pandas as pd
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [84]:
import pandas as pd
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data, index=['first', 'second'])
df

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


# Working with text data

In [86]:
s = pd.Series(['Tom','Jerry','Spike'])
s

0      Tom
1    Jerry
2    Spike
dtype: object

In [87]:
s.str.lower()

0      tom
1    jerry
2    spike
dtype: object

In [88]:
s.str.upper()

0      TOM
1    JERRY
2    SPIKE
dtype: object

In [89]:
data = ['Cuite pie' , 'Sweetie Pie' ,'Hottie','Sexy']
s = pd.Series(data)
s

0      Cuite pie
1    Sweetie Pie
2         Hottie
3           Sexy
dtype: object

In [92]:
len(data)

4

In [34]:
s.str.len()                                                                                   

0     9
1    11
2     6
3     4
dtype: int64

In [94]:
s.str.get_dummies()

Unnamed: 0,Cuite pie,Hottie,Sexy,Sweetie Pie
0,1,0,0,0
1,0,0,0,1
2,0,1,0,0
3,0,0,1,0


In [98]:
s.str.count('z')

0    0
1    0
2    0
3    0
dtype: int64

# Merging/Joining

In [100]:
import pandas as pd
left = pd.DataFrame({
   'id':[1,2,3,4,5],
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
   {'id':[1,2,3,4,5],
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5']})
print(left)
print(right)

   id    Name subject_id
0   1    Alex       sub1
1   2     Amy       sub2
2   3   Allen       sub4
3   4   Alice       sub6
4   5  Ayoung       sub5
   id   Name subject_id
0   1  Billy       sub2
1   2  Brian       sub4
2   3   Bran       sub3
3   4  Bryce       sub6
4   5  Betty       sub5


In [None]:
pd.merge()

In [56]:
import pandas as pd
left = pd.DataFrame({
   'id':[1,2,3,4,5],
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
   {'id':[1,2,3,4,5],
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5']})
pd.merge(left,right,on='id')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5
