In [2]:
import pandas as pd

## Panda Series


In [3]:
# Panda Series
marks = [50, 45, 49, 47]
subjects = ["maths", "english", "science", "history"]
data = {"maths": 50, "science": 49, "english": 45}
pd.Series(marks)
mark_series = pd.Series(data, index=subjects)
mark_series

maths      50.0
english    45.0
science    49.0
history     NaN
dtype: float64

In [4]:
## Checking Null values
mark_series.isnull()
mark_series.notnull()

maths       True
english     True
science     True
history    False
dtype: bool

In [5]:
## Filtering
mark_series[mark_series>45]

maths      50.0
science    49.0
dtype: float64

In [6]:
## Assignment and equivalency
mark_series['history'] = 47
mark_series['history'] == 47
mark_series.history

47.0

In [7]:
## Sorting
tmp = ('a', 'j', 'e', 'k', 'i')
pd.Series(tmp).sort_values()
mark_series.sort_values(ascending=False)

maths      50.0
science    49.0
history    47.0
english    45.0
dtype: float64

In [8]:
## Ranks
mark_series.rank(ascending=False) # ~ to index of sorted Series

maths      1.0
english    4.0
science    2.0
history    3.0
dtype: float64

## Panda DataFrames


In [9]:
## initialize I: Dict method
data = {
    "subjects": ["physics", "maths", "chemistry"],
    "marks": [67,69, 68],
    "Grade": ['A', 'S', 'A']
}
df = pd.DataFrame(data)
df

Unnamed: 0,subjects,marks,Grade
0,physics,67,A
1,maths,69,S
2,chemistry,68,A


In [10]:
## Initialize II: Explicit indexes and columns
data = [
    ["phys", "chem", "math"],
    [98, 97, ],
    ['A', 'B', 'C']
]
df2 = pd.DataFrame(data, index=['I', 'II', 'III'], columns=['subj', 'mark', 'grade'])
df2

Unnamed: 0,subj,mark,grade
I,phys,chem,math
II,98,97,
III,A,B,C


In [11]:
## Transpose
# df.T
df2.T

Unnamed: 0,I,II,III
subj,phys,98.0,A
mark,chem,97.0,B
grade,math,,C


In [12]:
## csv files
data = pd.read_csv("Downloads/example.csv")
data
data.head()
data.tail()

Unnamed: 0,Age,Weight (in kg),Height (in m)
18,67,78,1.85
19,26,65,1.21
20,68,50,1.32
21,56,76,1.69
22,67,78,1.85


In [13]:
## Dimensions, data & info
data.shape
data.info
data.dtypes

Age                 int64
Weight (in kg)      int64
Height (in m)     float64
dtype: object

In [29]:
## Adding New Col [BMI] and row [23]
data["bmi"] = data["Weight (in kg)"]/data["Height (in m)"] **2
data.loc[23] = [67, 78, 1.85, 22.78]
data.tail()

Unnamed: 0,Age,Weight (in kg),Height (in m),bmi
19,26.0,65.0,1.21,44.395875
20,68.0,50.0,1.32,28.696051
21,56.0,76.0,1.69,26.609713
22,67.0,78.0,1.85,22.790358
23,67.0,78.0,1.85,22.78


In [27]:
## loc [labels]
data.loc[0:10, ['Weight (in kg)', 'Height (in m)']]

Unnamed: 0,Weight (in kg),Height (in m)
0,60.0,1.35
1,43.0,1.21
2,78.0,1.5
3,65.0,1.21
4,50.0,1.32
5,43.0,1.52
6,32.0,1.65
7,34.0,1.61
8,23.0,1.24
9,21.0,1.52


In [28]:
## iloc [int positions]
data.iloc[1:10, 0:3]

Unnamed: 0,Age,Weight (in kg),Height (in m)
1,12.0,43.0,1.21
2,54.0,78.0,1.5
3,26.0,65.0,1.21
4,68.0,50.0,1.32
5,21.0,43.0,1.52
6,10.0,32.0,1.65
7,57.0,34.0,1.61
8,75.0,23.0,1.24
9,32.0,21.0,1.52


In [35]:
### df2 eg
df2.loc[['II', 'III'], ['subj', 'mark']]
df2.iloc[[1,2], :]

Unnamed: 0,subj,mark,grade
II,98,97,
III,A,B,C


In [46]:
## Sort
data.sort_values('Age').head()
# data['Age'].sort_values()

Unnamed: 0,Age,Weight (in kg),Height (in m),bmi
6,10.0,32.0,1.65,11.753903
1,12.0,43.0,1.21,29.369579
5,21.0,43.0,1.52,18.611496
13,23.0,45.0,1.75,14.693878
10,23.0,53.0,1.5,23.555556


In [59]:
## Rank
# dense groups together ranks while min have distinct ranks even for same value
data['bmi_rank'] = data['bmi'].rank(method='dense')
data.head()

Unnamed: 0,Age,Weight (in kg),Height (in m),bmi,bmi_rank
0,45.0,60.0,1.35,32.921811,16.0
1,12.0,43.0,1.21,29.369579,14.0
2,54.0,78.0,1.5,34.666667,17.0
3,26.0,65.0,1.21,44.395875,18.0
4,68.0,50.0,1.32,28.696051,13.0


In [68]:
## isnull() notnull()
data = pd.read_csv('Downloads/example_missingdata.csv')
data.isnull().sum()

Age               1
Weight (in kg)    2
Height (in m)     1
dtype: int64