###  Pandas Complete Guide
https://pandas.pydata.org/docs/pandas.pdf

## Importing pandas 

In [1]:
import pandas as pd 

## Reading csv file 

In [8]:
pd = pd.read_csv('E:/Downloads/Player.csv')

In [9]:
pd

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


## Print top 20 rows :

#### By default head() will give top 5 records

In [12]:
pd.head(20)

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
5,6,R Dravid,11-Jan-73,Right_Hand,Right-arm offbreak,India,0,
6,7,W Jaffer,16-Feb-78,Right_Hand,Right-arm offbreak,India,0,
7,8,V Kohli,5-Nov-88,Right_Hand,Right-arm medium,India,0,
8,9,JH Kallis,16-Oct-75,Right_Hand,Right-arm fast-medium,South Africa,0,
9,10,CL White,18-Aug-83,Right_Hand,Legbreak googly,Australia,0,


## Print last 5 records :

In [13]:
pd.tail()

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,
522,524,B Aparajith,8-Jul-94,Right_Hand,Right-arm offbreak,India,0,


In [17]:
df = pd.read_csv('E:/Downloads/Player.csv')

In [18]:
type(df)

pandas.core.frame.DataFrame

## Storing top 5 records of previous file into another csv file

In [22]:
df1  =  df.head()

In [20]:
df.tail()

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,
522,524,B Aparajith,8-Jul-94,Right_Hand,Right-arm offbreak,India,0,


In [23]:
df1.to_csv("test.csv")

In [26]:
pd.read_csv('test.csv')

Unnamed: 0.1,Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,


### By default read_csv will read comma seperated files. If we want to read data which is sepetated by any other delimiter we have to specify explicitly

### If we mention header=none then it won't read header. Instead it will print indexes as header

### If we don't want the inbuilt header we can specify our own column names by using 'names'

### To skip the unwanted rows we can delete them by using  'skiprows'

In [39]:
df = pd.read_csv('E:/Downloads/Player.csv',sep='#',header=None,names=['a','b','c','d','e','f','g','h'],skiprows=[0,4,6,8])

In [40]:
df

Unnamed: 0,a,b,c,d,e,f,g,h
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
4,7,W Jaffer,16-Feb-78,Right_Hand,Right-arm offbreak,India,0,
...,...,...,...,...,...,...,...,...
515,519,Subroto Das,,,,India,1,
516,520,K Srinivasan,,,,India,1,
517,521,VK Sharma,,,,India,1,
518,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


### To covert any value into NaN value we can use 'na_values'

In [50]:
df = pd.read_csv('E:/Downloads/Player.csv',sep='#',na_values=['SC Ganguly',"BB McCullum"])

In [51]:
df

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [52]:
df = pd.read_csv('E:/Downloads/Player.csv',sep='#')

In [53]:
df

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [54]:
df1 = pd.read_csv('E:/Downloads/Player.csv',sep='#',skiprows=[0,1,2])

In [55]:
df1

Unnamed: 0,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,Unnamed: 7
0,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
1,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
2,6,R Dravid,11-Jan-73,Right_Hand,Right-arm offbreak,India,0,
3,7,W Jaffer,16-Feb-78,Right_Hand,Right-arm offbreak,India,0,
4,8,V Kohli,5-Nov-88,Right_Hand,Right-arm medium,India,0,
...,...,...,...,...,...,...,...,...
515,519,Subroto Das,,,,India,1,
516,520,K Srinivasan,,,,India,1,
517,521,VK Sharma,,,,India,1,
518,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [56]:
df

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [63]:
df.head()

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,


## Gives all the info about the dataset :

In [58]:
df.describe()

Unnamed: 0,Player_Id,Is_Umpire,Unnamed: 7
count,523.0,523.0,12.0
mean,262.003824,0.099426,6.5
std,151.127981,0.29952,3.605551
min,1.0,0.0,1.0
25%,131.5,0.0,3.75
50%,262.0,0.0,6.5
75%,392.5,0.0,9.25
max,524.0,1.0,12.0


## Gives all the datatypes in out dataset :

In [61]:
df1.dtypes

3                     int64
RT Ponting           object
19-Dec-74            object
Right_Hand           object
Right-arm medium     object
Australia            object
0                     int64
Unnamed: 7          float64
dtype: object

In [66]:
df1 = pd.read_csv('E:/Downloads/Player.csv',sep='#')

In [67]:
df1

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [75]:
df1.to_csv("result3.csv",index=False,sep = "#", columns=['Player_Id','DOB'])

In [76]:
df = pd.read_csv('E:/Downloads/Player.csv',sep='#')

In [77]:
df

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
0,1,SC Ganguly,8-Jul-72,Left_Hand,Right-arm medium,India,0,
1,2,BB McCullum,27-Sep-81,Right_Hand,Right-arm medium,New Zealand,0,
2,3,RT Ponting,19-Dec-74,Right_Hand,Right-arm medium,Australia,0,
3,4,DJ Hussey,15-Jul-77,Right_Hand,Right-arm offbreak,Australia,0,
4,5,Mohammad Hafeez,17-Oct-80,Right_Hand,Right-arm offbreak,Pakistan,0,
...,...,...,...,...,...,...,...,...
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,


In [78]:
df.tail()

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Hand,Bowling_Skill,Country,Is_Umpire,Unnamed: 7
518,519,Subroto Das,,,,India,1,
519,520,K Srinivasan,,,,India,1,
520,521,VK Sharma,,,,India,1,
521,523,AV Wankhade,14-Mar-92,Right_Hand,,India,0,
522,524,B Aparajith,8-Jul-94,Right_Hand,Right-arm offbreak,India,0,


## Print only column names 

In [79]:
df.columns

Index(['Player_Id', 'Player_Name', 'DOB', 'Batting_Hand', 'Bowling_Skill',
       'Country', 'Is_Umpire', 'Unnamed: 7'],
      dtype='object')

### If we pass the column name normally it will return in terms of series

In [81]:
type(df['Player_Id'])

pandas.core.series.Series

### If we pass the column name in a list then it will return in terms a dataframe

In [83]:
type(df[['Player_Id']])

pandas.core.frame.DataFrame

## Slicing

### If there is more than one columns then we should pass the column names in a list otherwise it will throw an error

In [86]:
df[['Player_Id', 'Batting_Hand']]

Unnamed: 0,Player_Id,Batting_Hand
0,1,Left_Hand
1,2,Right_Hand
2,3,Right_Hand
3,4,Right_Hand
4,5,Right_Hand
...,...,...
518,519,
519,520,
520,521,
521,523,Right_Hand


In [87]:
df[['Is_Umpire' , 'Player_Id' , 'Batting_Hand']]

Unnamed: 0,Is_Umpire,Player_Id,Batting_Hand
0,0,1,Left_Hand
1,0,2,Right_Hand
2,0,3,Right_Hand
3,0,4,Right_Hand
4,0,5,Right_Hand
...,...,...,...
518,1,519,
519,1,520,
520,1,521,
521,0,523,Right_Hand


## Reading an excel sheet

In [91]:
df2 = pd.read_excel('E:/Downloads/sample-excel-master/LUSID Excel - Business Agility - Making Simple Changes Quickly & Easily.xlsx',sheet_name='Create your instrument universe')

## Reading html page

In [None]:
!pip install html5lib
!pip install lxml
!pip install beautifulSoup4



In [14]:
import html5lib
import lxml
from bs4 import BeautifulSoup

In [None]:
https://www.kaggle.com/alampalsingh/qualification-status-dataset

In [20]:
df5 = pd.read_html("https://pbpython.com/pandas-html-table.html")

In [7]:
df4 = pd.read_html("https://www.basketball-reference.com/players/i/ibakase01.html?utm_source=direct&utm_medium=Share&utm_campaign=ShareTool")

In [None]:
df4

In [9]:
type(df4)

list

## Gives numbers of tables in the html page that we have read

In [6]:
len(df4) 

6

In [21]:
df5[0]

Unnamed: 0.1,Unnamed: 0,Year,Office,GOP,DFL,Others
0,0,2018,Governor,42.4%,53.9%,3.7%
1,1,2018,Senator,36.2%,60.3%,3.4%
2,2,2018,Senator,42.4%,53.0%,4.6%
3,3,2016,President,44.9%,46.4%,8.6%
4,4,2014,Governor,44.5%,50.1%,5.4%


In [99]:
df4[3].to_csv("players_data.csv")

In [101]:
df9=pd.read_html('''https://www.basketball-reference.com/players/i/ibakase01.html?utm_source=direct&utm_medium=Share&utm_campaign=ShareTool
''')


In [None]:
df9[0]

## To create different CSV files corresponding to each table in html page

In [103]:
html_data = pd.read_html("https://www.basketball-reference.com/players/i/ibakase01.html?utm_source=direct&utm_medium=Share&utm_campaign=ShareTool")
for i in range(len(html_data)):
    html_data[i].to_csv(f"html_data{i}.csv", index=False)


## How to read a github dataset (it should be in raw format)

In [None]:
https://raw.githubusercontent.com/BharadwajEdera/Python-Module/master/Python%20Classes/Fruits.csv

In [22]:
pd.read_csv("https://raw.githubusercontent.com/BharadwajEdera/Python-Module/master/Python%20Classes/Fruits.csv")

Unnamed: 0,Id,Name,Price,Stock
0,1,Apple,0.4,30
1,2,Avacado,0.5,25
2,3,Banana,0.6,20
3,4,Blackberry,0.4,50
4,5,Cherry,0.5,40
5,6,Grape,0.7,15
6,7,Kiwi,0.6,15
7,8,Lemon,0.4,25
8,9,Mango,0.5,30
9,10,Orange,0.6,10


In [8]:
pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## Reading json data

#### We can cross verify our json data format by checking it on json viewer !!!!

http://jsonviewer.stack.hu/

In [23]:
s = """{
"name" :"sudh",
"phone_no" : 45454545,
"comp":["iNeuron", "EY" , "unilver" , "verizon" , "deloitte" , "wipro"]
}
"""

In [24]:
s

'{\n"name" :"sudh",\n"phone_no" : 45454545,\n"comp":["iNeuron", "EY" , "unilver" , "verizon" , "deloitte" , "wipro"]\n}\n'

In [10]:
import json 

In [11]:
s1 = json.loads(s)

In [12]:
s1

{'name': 'sudh',
 'phone_no': 45454545,
 'comp': ['iNeuron', 'EY', 'unilver', 'verizon', 'deloitte', 'wipro']}

## Converting json data into tabular format using pandas

In [13]:
pd.DataFrame(s1)

Unnamed: 0,name,phone_no,comp
0,sudh,45454545,iNeuron
1,sudh,45454545,EY
2,sudh,45454545,unilver
3,sudh,45454545,verizon
4,sudh,45454545,deloitte
5,sudh,45454545,wipro


In [130]:
pd.DataFrame(s1,columns=['comp'])

Unnamed: 0,comp
0,iNeuron
1,EY
2,unilver
3,verizon
4,deloitte
5,wipro


## Loading complex json data

In [14]:
import requests

In [15]:
res = requests.get('https://api.github.com/repos/pandas-dev/pandas/issues')

In [16]:
res 

<Response [200]>

In [17]:
js = res.json()

In [None]:
js

In [19]:
js[0]['title']

'ENH: Using transparentpath in read_csv'

In [20]:
for i in range(len(js)):
    print(js[i]['title'])

ENH: Using transparentpath in read_csv
ENH: DataFrame.to_expr() method
_libs.hashtable.ismember incorrect with tuples containing nan
BUG: Odd timezone offset change with old datetimes with tz_convert
REF: de-duplicate symmetric_difference, _union
REF: de-duplicate IntervalIndex setops
BUG: IntervalIndex([np.nan, np.nan]).is_monotonic returns True
TYP: datetimelike
CI: Re-enable Circle CI
ENH: retain attrs when concat dataframes
BUG: DataFrame `isin` function randomly selects values (instead of using them all) when on Intel CPU and input is a torch Tensor
ENH: Allow different senses of "distance" for method=nearest indexing
REF: share wrapping in MultiIndex setops
REF: de-duplicate Index._intersection + MultiIndex._intersection
BUG: MultiIndex.unique incorrect when NA value is present
DOC: Whatsnew1.3 grammar
BUG: wrong exception when slicing on TimedeltaIndex
BUG: read_csv does not read double double quotes in pipe delimited txt file
DOC/BLD: update README.md, list setuptools as depend

In [21]:
len(js)

30

In [None]:
js

In [149]:
df4 = pd.DataFrame(js)

In [150]:
df4.to_csv('json_dump.csv')

In [152]:
js[0]['user']

{'login': 'jbrockmendel',
 'id': 8078968,
 'node_id': 'MDQ6VXNlcjgwNzg5Njg=',
 'avatar_url': 'https://avatars.githubusercontent.com/u/8078968?v=4',
 'gravatar_id': '',
 'url': 'https://api.github.com/users/jbrockmendel',
 'html_url': 'https://github.com/jbrockmendel',
 'followers_url': 'https://api.github.com/users/jbrockmendel/followers',
 'following_url': 'https://api.github.com/users/jbrockmendel/following{/other_user}',
 'gists_url': 'https://api.github.com/users/jbrockmendel/gists{/gist_id}',
 'starred_url': 'https://api.github.com/users/jbrockmendel/starred{/owner}{/repo}',
 'subscriptions_url': 'https://api.github.com/users/jbrockmendel/subscriptions',
 'organizations_url': 'https://api.github.com/users/jbrockmendel/orgs',
 'repos_url': 'https://api.github.com/users/jbrockmendel/repos',
 'events_url': 'https://api.github.com/users/jbrockmendel/events{/privacy}',
 'received_events_url': 'https://api.github.com/users/jbrockmendel/received_events',
 'type': 'User',
 'site_admin': F

## Coverting json to tabular format

In [None]:
pd.read_json("https://api.github.com/repos/pandas-dev/pandas/issues")


## From the dataset extract the user key and convert into tabular format

In [154]:
lis=[]
for i in range(len(js)):
    lis.append(js[i]['user'])
pd.DataFrame(lis)


Unnamed: 0,login,id,node_id,avatar_url,gravatar_id,url,html_url,followers_url,following_url,gists_url,starred_url,subscriptions_url,organizations_url,repos_url,events_url,received_events_url,type,site_admin
0,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
1,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
2,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
3,rhshadrach,45562402,MDQ6VXNlcjQ1NTYyNDAy,https://avatars.githubusercontent.com/u/455624...,,https://api.github.com/users/rhshadrach,https://github.com/rhshadrach,https://api.github.com/users/rhshadrach/followers,https://api.github.com/users/rhshadrach/follow...,https://api.github.com/users/rhshadrach/gists{...,https://api.github.com/users/rhshadrach/starre...,https://api.github.com/users/rhshadrach/subscr...,https://api.github.com/users/rhshadrach/orgs,https://api.github.com/users/rhshadrach/repos,https://api.github.com/users/rhshadrach/events...,https://api.github.com/users/rhshadrach/receiv...,User,False
4,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
5,architsingh15,15556124,MDQ6VXNlcjE1NTU2MTI0,https://avatars.githubusercontent.com/u/155561...,,https://api.github.com/users/architsingh15,https://github.com/architsingh15,https://api.github.com/users/architsingh15/fol...,https://api.github.com/users/architsingh15/fol...,https://api.github.com/users/architsingh15/gis...,https://api.github.com/users/architsingh15/sta...,https://api.github.com/users/architsingh15/sub...,https://api.github.com/users/architsingh15/orgs,https://api.github.com/users/architsingh15/repos,https://api.github.com/users/architsingh15/eve...,https://api.github.com/users/architsingh15/rec...,User,False
6,fangchenli,7614606,MDQ6VXNlcjc2MTQ2MDY=,https://avatars.githubusercontent.com/u/761460...,,https://api.github.com/users/fangchenli,https://github.com/fangchenli,https://api.github.com/users/fangchenli/followers,https://api.github.com/users/fangchenli/follow...,https://api.github.com/users/fangchenli/gists{...,https://api.github.com/users/fangchenli/starre...,https://api.github.com/users/fangchenli/subscr...,https://api.github.com/users/fangchenli/orgs,https://api.github.com/users/fangchenli/repos,https://api.github.com/users/fangchenli/events...,https://api.github.com/users/fangchenli/receiv...,User,False
7,TheNeuralBit,675055,MDQ6VXNlcjY3NTA1NQ==,https://avatars.githubusercontent.com/u/675055...,,https://api.github.com/users/TheNeuralBit,https://github.com/TheNeuralBit,https://api.github.com/users/TheNeuralBit/foll...,https://api.github.com/users/TheNeuralBit/foll...,https://api.github.com/users/TheNeuralBit/gist...,https://api.github.com/users/TheNeuralBit/star...,https://api.github.com/users/TheNeuralBit/subs...,https://api.github.com/users/TheNeuralBit/orgs,https://api.github.com/users/TheNeuralBit/repos,https://api.github.com/users/TheNeuralBit/even...,https://api.github.com/users/TheNeuralBit/rece...,User,False
8,fangchenli,7614606,MDQ6VXNlcjc2MTQ2MDY=,https://avatars.githubusercontent.com/u/761460...,,https://api.github.com/users/fangchenli,https://github.com/fangchenli,https://api.github.com/users/fangchenli/followers,https://api.github.com/users/fangchenli/follow...,https://api.github.com/users/fangchenli/gists{...,https://api.github.com/users/fangchenli/starre...,https://api.github.com/users/fangchenli/subscr...,https://api.github.com/users/fangchenli/orgs,https://api.github.com/users/fangchenli/repos,https://api.github.com/users/fangchenli/events...,https://api.github.com/users/fangchenli/receiv...,User,False
9,mwaddoups,11441393,MDQ6VXNlcjExNDQxMzkz,https://avatars.githubusercontent.com/u/114413...,,https://api.github.com/users/mwaddoups,https://github.com/mwaddoups,https://api.github.com/users/mwaddoups/followers,https://api.github.com/users/mwaddoups/followi...,https://api.github.com/users/mwaddoups/gists{/...,https://api.github.com/users/mwaddoups/starred...,https://api.github.com/users/mwaddoups/subscri...,https://api.github.com/users/mwaddoups/orgs,https://api.github.com/users/mwaddoups/repos,https://api.github.com/users/mwaddoups/events{...,https://api.github.com/users/mwaddoups/receive...,User,False


In [None]:
user = []
res=requests.get('https://api.github.com/repos/pandas-dev/pandas/issues')
js=res.json()
for i in range(len(js)):
    user.append(js[i]['user'])

pd.DataFrame(user)


In [157]:
pd.DataFrame([js[i]["user"] for i in range(len(js))])


Unnamed: 0,login,id,node_id,avatar_url,gravatar_id,url,html_url,followers_url,following_url,gists_url,starred_url,subscriptions_url,organizations_url,repos_url,events_url,received_events_url,type,site_admin
0,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
1,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
2,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
3,rhshadrach,45562402,MDQ6VXNlcjQ1NTYyNDAy,https://avatars.githubusercontent.com/u/455624...,,https://api.github.com/users/rhshadrach,https://github.com/rhshadrach,https://api.github.com/users/rhshadrach/followers,https://api.github.com/users/rhshadrach/follow...,https://api.github.com/users/rhshadrach/gists{...,https://api.github.com/users/rhshadrach/starre...,https://api.github.com/users/rhshadrach/subscr...,https://api.github.com/users/rhshadrach/orgs,https://api.github.com/users/rhshadrach/repos,https://api.github.com/users/rhshadrach/events...,https://api.github.com/users/rhshadrach/receiv...,User,False
4,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
5,architsingh15,15556124,MDQ6VXNlcjE1NTU2MTI0,https://avatars.githubusercontent.com/u/155561...,,https://api.github.com/users/architsingh15,https://github.com/architsingh15,https://api.github.com/users/architsingh15/fol...,https://api.github.com/users/architsingh15/fol...,https://api.github.com/users/architsingh15/gis...,https://api.github.com/users/architsingh15/sta...,https://api.github.com/users/architsingh15/sub...,https://api.github.com/users/architsingh15/orgs,https://api.github.com/users/architsingh15/repos,https://api.github.com/users/architsingh15/eve...,https://api.github.com/users/architsingh15/rec...,User,False
6,fangchenli,7614606,MDQ6VXNlcjc2MTQ2MDY=,https://avatars.githubusercontent.com/u/761460...,,https://api.github.com/users/fangchenli,https://github.com/fangchenli,https://api.github.com/users/fangchenli/followers,https://api.github.com/users/fangchenli/follow...,https://api.github.com/users/fangchenli/gists{...,https://api.github.com/users/fangchenli/starre...,https://api.github.com/users/fangchenli/subscr...,https://api.github.com/users/fangchenli/orgs,https://api.github.com/users/fangchenli/repos,https://api.github.com/users/fangchenli/events...,https://api.github.com/users/fangchenli/receiv...,User,False
7,TheNeuralBit,675055,MDQ6VXNlcjY3NTA1NQ==,https://avatars.githubusercontent.com/u/675055...,,https://api.github.com/users/TheNeuralBit,https://github.com/TheNeuralBit,https://api.github.com/users/TheNeuralBit/foll...,https://api.github.com/users/TheNeuralBit/foll...,https://api.github.com/users/TheNeuralBit/gist...,https://api.github.com/users/TheNeuralBit/star...,https://api.github.com/users/TheNeuralBit/subs...,https://api.github.com/users/TheNeuralBit/orgs,https://api.github.com/users/TheNeuralBit/repos,https://api.github.com/users/TheNeuralBit/even...,https://api.github.com/users/TheNeuralBit/rece...,User,False
8,fangchenli,7614606,MDQ6VXNlcjc2MTQ2MDY=,https://avatars.githubusercontent.com/u/761460...,,https://api.github.com/users/fangchenli,https://github.com/fangchenli,https://api.github.com/users/fangchenli/followers,https://api.github.com/users/fangchenli/follow...,https://api.github.com/users/fangchenli/gists{...,https://api.github.com/users/fangchenli/starre...,https://api.github.com/users/fangchenli/subscr...,https://api.github.com/users/fangchenli/orgs,https://api.github.com/users/fangchenli/repos,https://api.github.com/users/fangchenli/events...,https://api.github.com/users/fangchenli/receiv...,User,False
9,mwaddoups,11441393,MDQ6VXNlcjExNDQxMzkz,https://avatars.githubusercontent.com/u/114413...,,https://api.github.com/users/mwaddoups,https://github.com/mwaddoups,https://api.github.com/users/mwaddoups/followers,https://api.github.com/users/mwaddoups/followi...,https://api.github.com/users/mwaddoups/gists{/...,https://api.github.com/users/mwaddoups/starred...,https://api.github.com/users/mwaddoups/subscri...,https://api.github.com/users/mwaddoups/orgs,https://api.github.com/users/mwaddoups/repos,https://api.github.com/users/mwaddoups/events{...,https://api.github.com/users/mwaddoups/receive...,User,False


In [None]:
l = []
for i in range(len(js)):
    l.append(js[i]['user'])
    
pd.DataFrame(l)


In [24]:
json_data = pd.read_json("https://api.github.com/repos/pandas-dev/pandas/issues")
data = pd.DataFrame.from_records(json_data['user'])
data.head()


Unnamed: 0,login,id,node_id,avatar_url,gravatar_id,url,html_url,followers_url,following_url,gists_url,starred_url,subscriptions_url,organizations_url,repos_url,events_url,received_events_url,type,site_admin
0,michaelshekasta,41590425,MDQ6VXNlcjQxNTkwNDI1,https://avatars.githubusercontent.com/u/415904...,,https://api.github.com/users/michaelshekasta,https://github.com/michaelshekasta,https://api.github.com/users/michaelshekasta/f...,https://api.github.com/users/michaelshekasta/f...,https://api.github.com/users/michaelshekasta/g...,https://api.github.com/users/michaelshekasta/s...,https://api.github.com/users/michaelshekasta/s...,https://api.github.com/users/michaelshekasta/orgs,https://api.github.com/users/michaelshekasta/r...,https://api.github.com/users/michaelshekasta/e...,https://api.github.com/users/michaelshekasta/r...,User,False
1,kerrickstaley,184773,MDQ6VXNlcjE4NDc3Mw==,https://avatars.githubusercontent.com/u/184773...,,https://api.github.com/users/kerrickstaley,https://github.com/kerrickstaley,https://api.github.com/users/kerrickstaley/fol...,https://api.github.com/users/kerrickstaley/fol...,https://api.github.com/users/kerrickstaley/gis...,https://api.github.com/users/kerrickstaley/sta...,https://api.github.com/users/kerrickstaley/sub...,https://api.github.com/users/kerrickstaley/orgs,https://api.github.com/users/kerrickstaley/repos,https://api.github.com/users/kerrickstaley/eve...,https://api.github.com/users/kerrickstaley/rec...,User,False
2,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False
3,rok,54589,MDQ6VXNlcjU0NTg5,https://avatars.githubusercontent.com/u/54589?v=4,,https://api.github.com/users/rok,https://github.com/rok,https://api.github.com/users/rok/followers,https://api.github.com/users/rok/following{/ot...,https://api.github.com/users/rok/gists{/gist_id},https://api.github.com/users/rok/starred{/owne...,https://api.github.com/users/rok/subscriptions,https://api.github.com/users/rok/orgs,https://api.github.com/users/rok/repos,https://api.github.com/users/rok/events{/privacy},https://api.github.com/users/rok/received_events,User,False
4,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/foll...,https://api.github.com/users/jbrockmendel/gist...,https://api.github.com/users/jbrockmendel/star...,https://api.github.com/users/jbrockmendel/subs...,https://api.github.com/users/jbrockmendel/orgs,https://api.github.com/users/jbrockmendel/repos,https://api.github.com/users/jbrockmendel/even...,https://api.github.com/users/jbrockmendel/rece...,User,False


## File Formats
![4.PNG](attachment:4.PNG)

# Pickle Format

In [1]:
import pandas as pd

In [2]:
from_file = pd.read_csv("Fruits.csv")

In [3]:
from_file.to_pickle("My_pickle")

![1.jpg](attachment:1.jpg)

![2.png](attachment:2.png)

![3.jpg](attachment:3.jpg)

In [4]:
pd.read_pickle("My_pickle")

Unnamed: 0,Id,Name,Price,Stock
0,1,Apple,0.4,30
1,2,Avacado,0.5,25
2,3,Banana,0.6,20
3,4,Blackberry,0.4,50
4,5,Cherry,0.5,40
5,6,Grape,0.7,15
6,7,Kiwi,0.6,15
7,8,Lemon,0.4,25
8,9,Mango,0.5,30
9,10,Orange,0.6,10


# .h5 File Format

In [10]:
from_file = pd.read_csv("Fruits.csv")

In [12]:
pip install tables

Collecting tables
  Downloading tables-3.6.1-2-cp36-cp36m-win_amd64.whl (3.2 MB)
Collecting numexpr>=2.6.2
  Downloading numexpr-2.7.3-cp36-cp36m-win_amd64.whl (93 kB)
Installing collected packages: numexpr, tables
Successfully installed numexpr-2.7.3 tables-3.6.1
Note: you may need to restart the kernel to use updated packages.




In [14]:
h5_data = pd.HDFStore('HDF.h5')

In [18]:
h5_data['obj1'] = from_file

In [19]:
h5_data['obj1']

Unnamed: 0,Id,Name,Price,Stock
0,1,Apple,0.4,30
1,2,Avacado,0.5,25
2,3,Banana,0.6,20
3,4,Blackberry,0.4,50
4,5,Cherry,0.5,40
5,6,Grape,0.7,15
6,7,Kiwi,0.6,15
7,8,Lemon,0.4,25
8,9,Mango,0.5,30
9,10,Orange,0.6,10


In [20]:
h5_data.put('obj2' , from_file , format = 'table')

In [21]:
h5_data.select('obj2' , where = ['index >=3 and index <= 7'])

Unnamed: 0,Id,Name,Price,Stock
3,4,Blackberry,0.4,50
4,5,Cherry,0.5,40
5,6,Grape,0.7,15
6,7,Kiwi,0.6,15
7,8,Lemon,0.4,25


### Pandas Operations

In [22]:
df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")

In [23]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [24]:
df.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [26]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

### describe works only for Columns with Numerical Data

In [27]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


### To Get Column Names with datatype == 'object'

In [28]:
df.dtypes == 'object'

PassengerId    False
Survived       False
Pclass         False
Name            True
Sex             True
Age            False
SibSp          False
Parch          False
Ticket          True
Fare           False
Cabin           True
Embarked        True
dtype: bool

In [29]:
df.dtypes[df.dtypes == 'object']

Name        object
Sex         object
Ticket      object
Cabin       object
Embarked    object
dtype: object

![5.PNG](attachment:5.PNG)

In [30]:
df.dtypes[df.dtypes == 'object'].index

Index(['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], dtype='object')

In [31]:
df1 = df[df.dtypes[df.dtypes == 'object'].index]

In [33]:
df1.describe()

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
count,891,891,891,204,889
unique,891,2,681,147,3
top,"Gronnestad, Mr. Daniel Danielsen",male,CA. 2343,C23 C25 C27,S
freq,1,577,7,4,644


### Filterout Columns which has data type = float

In [39]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [34]:
df.dtypes == 'float'

PassengerId    False
Survived       False
Pclass         False
Name           False
Sex            False
Age             True
SibSp          False
Parch          False
Ticket         False
Fare            True
Cabin          False
Embarked       False
dtype: bool

In [35]:
df.dtypes[df.dtypes == 'float']

Age     float64
Fare    float64
dtype: object

In [36]:
df.dtypes[df.dtypes == 'float'].index

Index(['Age', 'Fare'], dtype='object')

In [38]:
df[df.dtypes[df.dtypes == 'float'].index].head()

Unnamed: 0,Age,Fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05


### Select Particular Columns from Data frame

In [40]:
df[["Age" , "Fare"]].head()

Unnamed: 0,Age,Fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05


### Series : (single Square Braces) and Data Frame (Two Square Braces)

In [42]:
type(df['Age'])

pandas.core.series.Series

In [43]:
type(df[['Age']])

pandas.core.frame.DataFrame

### Slicing 

In [44]:
df['Age'][1:3]

1    38.0
2    26.0
Name: Age, dtype: float64

In [47]:
df['Age'][0:25:5]

0     22.0
5      NaN
10     4.0
15    55.0
20    35.0
Name: Age, dtype: float64

### Sorting 

In [49]:
sorted(df['Age'][6:25:5])

[2.0, 34.0, 54.0, 58.0]

### Parse the Data in Series format(Single Square Brace

In [55]:
for i in df.columns:
    print(df[i].head(1))

0    1
Name: PassengerId, dtype: int64
0    0
Name: Survived, dtype: int64
0    3
Name: Pclass, dtype: int64
0    Braund, Mr. Owen Harris
Name: Name, dtype: object
0    male
Name: Sex, dtype: object
0    22.0
Name: Age, dtype: float64
0    1
Name: SibSp, dtype: int64
0    0
Name: Parch, dtype: int64
0    A/5 21171
Name: Ticket, dtype: object
0    7.25
Name: Fare, dtype: float64
0    NaN
Name: Cabin, dtype: object
0    S
Name: Embarked, dtype: object
0    This is new Column
Name: newcolumn, dtype: object


### Parse the Data in Dataframe format (Two Square Braces)

In [56]:
for i in df.columns:
    print(df[[i]].head(1))

   PassengerId
0            1
   Survived
0         0
   Pclass
0       3
                      Name
0  Braund, Mr. Owen Harris
    Sex
0  male
    Age
0  22.0
   SibSp
0      1
   Parch
0      0
      Ticket
0  A/5 21171
   Fare
0  7.25
  Cabin
0   NaN
  Embarked
0        S
            newcolumn
0  This is new Column


### How to add New Column

In [53]:
df["newcolumn"] = "This is new Column"

In [54]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,This is new Column
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,This is new Column


### pandas.Categorical

In [57]:
pd.Categorical(df['Cabin'])

[NaN, 'C85', NaN, 'C123', NaN, ..., NaN, 'B42', NaN, 'C148', NaN]
Length: 891
Categories (147, object): ['A10', 'A14', 'A16', 'A19', ..., 'F38', 'F4', 'G6', 'T']

In [58]:
pd.Categorical(df['Sex'])

['male', 'female', 'female', 'female', 'male', ..., 'male', 'female', 'female', 'male', 'male']
Length: 891
Categories (2, object): ['female', 'male']

In [59]:
pd.Categorical(df['Pclass'])

[3, 1, 3, 1, 3, ..., 2, 1, 3, 1, 3]
Length: 891
Categories (3, int64): [1, 2, 3]

### unique()

In [60]:
df['Pclass'].unique()

array([3, 1, 2], dtype=int64)

### isnull()

In [61]:
df['Pclass'].isnull()

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Pclass, Length: 891, dtype: bool

In [63]:
df['Cabin'].isnull()

0       True
1      False
2       True
3      False
4       True
       ...  
886     True
887    False
888     True
889    False
890     True
Name: Cabin, Length: 891, dtype: bool

In [62]:
  df['Cabin'][df['Cabin'].isnull() == True]

0      NaN
2      NaN
4      NaN
5      NaN
7      NaN
      ... 
884    NaN
885    NaN
886    NaN
888    NaN
890    NaN
Name: Cabin, Length: 687, dtype: object

In [64]:
  df['Cabin'][df['Cabin'].isnull() == True].index

Int64Index([  0,   2,   4,   5,   7,   8,   9,  12,  13,  14,
            ...
            878, 880, 881, 882, 883, 884, 885, 886, 888, 890],
           dtype='int64', length=687)

### TO Extract Data from rows we use .loc and .iloc

In [67]:
df.iloc[df['Cabin'][df['Cabin'].isnull() == True].index].head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,This is new Column
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,This is new Column
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,This is new Column


In [69]:
df.iloc[[1 , 5 , 7]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,This is new Column
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,This is new Column
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S,This is new Column


In [81]:
a = df['Cabin'][df['Cabin'].isnull() == True]


In [82]:
df.iloc[a.index].head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,This is new Column
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,This is new Column


In [83]:
len(a.index)

687

### To get Name of the Person who pid Maximum Fare
### Hint : 
### 1.Reach out there step by  step
### 2. Filter
### 3. Extract

In [85]:
df['Fare'] == max(df['Fare'])

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Fare, Length: 891, dtype: bool

In [86]:
df[df['Fare'] == max(df['Fare'])]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn
258,259,1,1,"Ward, Miss. Anna",female,35.0,0,0,PC 17755,512.3292,,C,This is new Column
679,680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36.0,0,1,PC 17755,512.3292,B51 B53 B55,C,This is new Column
737,738,1,1,"Lesurer, Mr. Gustave J",male,35.0,0,0,PC 17755,512.3292,B101,C,This is new Column


In [88]:
df[df['Fare'] == max(df['Fare'])]['Name']

258                      Ward, Miss. Anna
679    Cardeza, Mr. Thomas Drake Martinez
737                Lesurer, Mr. Gustave J
Name: Name, dtype: object

### To Create a New column as Sum of other two Columns

In [89]:
df['new_col'] = df['PassengerId'] + df['Age']

In [90]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,newcolumn,new_col
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,This is new Column,23.0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,This is new Column,40.0


### Task

### Chiptole.tsv Dataset
https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv

### Beer.txt Dataset
https://raw.githubusercontent.com/justmarkham/DAT8/master/data/beer.txt

### US_Crime_Rates_1960_2014.csv DataSet
https://gist.githubusercontent.com/ganeshbabuNN/9ef28b5313d52b0007e8de63c33ff435/raw/f69899e2ad7798bc64692f080272821c84e29a07/US_Crime_Rates_1960_2014.csv

### vincentarelbundock openintro causaldata HMLdiag
https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/datasets.csv

### Ineuron Practise Examples in  Pandas
https://drive.google.com/file/d/1etGP85wJWsgqbsh3gX5oxc8dPKLMA1eT/view

In [92]:
pd.read_table('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv',sep='\t')

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75
