###### - Introduction to Pandas
###### - Series & DataFrame
###### - Creation of DataFrames, Exploring Data (Series and DataFrame)
###### - Operations on DataFrames

In [35]:
import pandas as pd
import numpy as np

#### Series

##### - A one-dimensional array like structure along with indices as label.

In [36]:
s1 = pd.Series([34, 67, 90, 34, 12, 67])
s1

0    34
1    67
2    90
3    34
4    12
5    67
dtype: int64

In [37]:
print(s1[0])

34


In [38]:
print(s1[0:3])

0    34
1    67
2    90
dtype: int64


In [39]:
print(s1[0::2])

0    34
2    90
4    12
dtype: int64


#### DataFrame

##### - A multi-dimensional (2D) array like structure along with indices as label.

In [40]:
df = pd.DataFrame()

print(df)

Empty DataFrame
Columns: []
Index: []


In [41]:
df = pd.DataFrame({
    'name': ["vansh", "abhay", "dhruvi"],
    'age': [20, 18, 22],
    'marks': [80, 98, 94],
})

df

Unnamed: 0,name,age,marks
0,vansh,20,80
1,abhay,18,98
2,dhruvi,22,94


In [42]:
df["name"]

0     vansh
1     abhay
2    dhruvi
Name: name, dtype: object

In [43]:
df[["name", "marks"]]

Unnamed: 0,name,marks
0,vansh,80
1,abhay,98
2,dhruvi,94


In [44]:
df["name"][0]

'vansh'

In [45]:
df[["name", "marks"]]

Unnamed: 0,name,marks
0,vansh,80
1,abhay,98
2,dhruvi,94


In [46]:
df['marks']

0    80
1    98
2    94
Name: marks, dtype: int64

In [47]:
df

Unnamed: 0,name,age,marks
0,vansh,20,80
1,abhay,18,98
2,dhruvi,22,94


In [48]:
df["city"] = ["delhi", "mumbai", "pune"]
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,delhi
1,abhay,18,98,mumbai
2,dhruvi,22,94,pune


In [49]:
df["city"] = ["surat", "baroda", "ahmedabad"]
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,surat
1,abhay,18,98,baroda
2,dhruvi,22,94,ahmedabad


In [50]:
print(df)

     name  age  marks       city
0   vansh   20     80      surat
1   abhay   18     98     baroda
2  dhruvi   22     94  ahmedabad


In [51]:
df['age'].dtypes

dtype('int64')

In [52]:
df.dtypes

name     object
age       int64
marks     int64
city     object
dtype: object

In [53]:
type(df)

pandas.core.frame.DataFrame

In [54]:
type(s1)

pandas.core.series.Series

In [55]:
df['marks']

0    80
1    98
2    94
Name: marks, dtype: int64

In [56]:
df['marks'] + 2

0     82
1    100
2     96
Name: marks, dtype: int64

In [57]:
df['marks'] - 2

0    78
1    96
2    92
Name: marks, dtype: int64

In [58]:
df['marks'] * 2

0    160
1    196
2    188
Name: marks, dtype: int64

In [59]:
df['marks'] / 2

0    40.0
1    49.0
2    47.0
Name: marks, dtype: float64

In [60]:
df['marks'] % 2

0    0
1    0
2    0
Name: marks, dtype: int64

In [61]:
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,surat
1,abhay,18,98,baroda
2,dhruvi,22,94,ahmedabad


In [62]:
df['marks'] = df['marks'] + 2

In [63]:
df

Unnamed: 0,name,age,marks,city
0,vansh,20,82,surat
1,abhay,18,100,baroda
2,dhruvi,22,96,ahmedabad


In [64]:
type(df['marks'])

pandas.core.series.Series

In [None]:
df2 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
])

df2

In [None]:
df3 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
], index=["name", "age", "marks"])

df3

In [None]:
df3 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
], columns=["name", "age", "marks"])

df3

In [None]:
type(df3)
# df3.dtypes

In [None]:
type(df3['name'])

In [None]:
# iloc & loc

In [66]:
dd = [
    ["Alice", 25, "New York"],
    ["Bob", 30, "London"],
    ["Charlie", 22, "Paris"]
]

df2 = pd.DataFrame(dd, columns=["Name", "Age", "City"])

df2

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,22,Paris


In [None]:
df2["Name"]

In [67]:
df2.Name

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [None]:
df2.Name[0]

In [None]:
df2

In [None]:
df2.iloc[0]

In [None]:
df2.iloc[0:2]

In [None]:
df2.iloc[0::2]

In [None]:
df2.iloc[0:2, 0:2]

In [None]:
df2

In [None]:
df2.loc[0]

In [None]:
# How to set index from existing column

df3 = df2.set_index("Name")

In [None]:
df3

In [None]:
df3.loc["Bob"]

In [None]:
df3.loc[["Alice", "Charlie"]]

In [None]:
df3.loc["Alice":"Bob"]

In [64]:
df3.loc[df3["City"] != "London"]

Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Charlie,22,Paris


In [None]:
l = ['23','34']

nl = [int(a) for a in l]

nl