In [2]:
# Working with Pandas dataframes
import pandas as pd

In [3]:
import numpy as np

In [4]:
li = [[100, "ryan", 100000.5],
      [101, "Mike", 20000.5],
      [102, 'Jonny', 25000.6]]
arr2d = np.array(li)

In [5]:
arr2d

array([['100', 'ryan', '100000.5'],
       ['101', 'Mike', '20000.5'],
       ['102', 'Jonny', '25000.6']], dtype='<U32')

In [6]:
print(arr2d[:,0])

['100' '101' '102']


In [6]:
print(arr2d[:,1])

['ryan' 'Mike' 'Jonny']


In [7]:
print(arr2d[:,2])

['100000.5' '20000.5' '25000.6']


In [7]:
#  Now lets convert this list to a data frame instead of an array
df = pd.DataFrame(li)

In [8]:
df

Unnamed: 0,0,1,2
0,100,ryan,100000.5
1,101,Mike,20000.5
2,102,Jonny,25000.6


In [14]:
# change the column names
df = pd.DataFrame(li, columns=['emp_no', 'name', 'salary'])

In [13]:
df

Unnamed: 0,emp_no,name,salary
0,100,ryan,100000.5
1,101,Mike,20000.5
2,102,Jonny,25000.6


In [10]:
# change the row names
df = pd.DataFrame(li, index=['row1', 'row2', 'row3'])

In [12]:
df

Unnamed: 0,0,1,2
row1,100,ryan,100000.5
row2,101,Mike,20000.5
row3,102,Jonny,25000.6


In [15]:
# Slice a specific column from the dataframe
df.emp_no

0    100
1    101
2    102
Name: emp_no, dtype: int64

In [16]:
# or alternatively, do it this way
df['emp_no']

0    100
1    101
2    102
Name: emp_no, dtype: int64

In [17]:
# assign data frame column slice to a variable
emp_no = df['emp_no']

In [18]:
emp_no

0    100
1    101
2    102
Name: emp_no, dtype: int64

In [19]:
# check datatype of data frame
type(emp_no)

pandas.core.series.Series

In [20]:
# check data type 
# O means string in pandas when you ask for data type
df.emp_no.dtype

dtype('int64')

In [26]:
# Using Dictionary 
dic = {'emp_no':[100,101,102], 'emp_name':['Ryan', 'Mike', 'Johnny'], 'emp_sal':[1000.5, 2000.5, 30000.6]}
       

In [28]:
dic

{'emp_no': [100, 101, 102],
 'emp_name': ['Ryan', 'Mike', 'Johnny'],
 'emp_sal': [1000.5, 2000.5, 30000.6]}

In [29]:
df = pd.DataFrame(dic)

In [30]:
df

Unnamed: 0,emp_no,emp_name,emp_sal
0,100,Ryan,1000.5
1,101,Mike,2000.5
2,102,Johnny,30000.6


In [31]:
# Creating data frames from a file
# ====> From clip board first

In [38]:
df = pd.read_clipboard()

In [39]:
df

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa
6,5.4,3.9,1.7,0.4,setosa
7,4.6,3.4,1.4,0.3,setosa
8,5.0,3.4,1.5,0.2,setosa
9,4.4,2.9,1.4,0.2,setosa
10,4.9,3.1,1.5,0.1,setosa


In [41]:
# Creating data frames from a CSV file
df = pd.read_csv('dataset/iris.csv')

In [42]:
df

Unnamed: 0.1,Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,virginica
146,147,6.3,2.5,5.0,1.9,virginica
147,148,6.5,3.0,5.2,2.0,virginica
148,149,6.2,3.4,5.4,2.3,virginica


In [46]:
pip install lxml

Collecting lxml
  Downloading lxml-4.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.9 MB)
[K     |████████████████████████████████| 6.9 MB 5.1 MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.9.0
Note: you may need to restart the kernel to use updated packages.


In [16]:
df = pd.read_html('dataset/index.html')

In [17]:
df

[   Rank                Name Total LBS Lost Total % Lost
 0     1        Mike Botelho           29.8       11.71%
 1     2      Paul Hutchison           24.5        9.88%
 2     3       Greg Wallerus           14.4        7.00%
 3     4     Frank Venturini           12.0        5.79%
 4     5  Christian Granados           10.3        4.70%
 5     6          Hugo Silva            3.4        1.74%
 6     7       Rob Simonetti            2.0        0.97%
 7     8       Ryan O'Connor          (3.0)      (1.54)%]

In [20]:
df[0]


Unnamed: 0,Rank,Name,Total LBS Lost,Total % Lost
0,1,Mike Botelho,29.8,11.71%
1,2,Paul Hutchison,24.5,9.88%
2,3,Greg Wallerus,14.4,7.00%
3,4,Frank Venturini,12.0,5.79%
4,5,Christian Granados,10.3,4.70%
5,6,Hugo Silva,3.4,1.74%
6,7,Rob Simonetti,2.0,0.97%
7,8,Ryan O'Connor,(3.0),(1.54)%
