# Pandas

For data manipulation and analysis. Two primary data-structures.
- Series - One-Dimensional Array like object, can hold any type of data like a column in a table.
- Dataframe - Two-Dimensional, size-mutable, heterogenous tabular data structure with labeled axes.

In [9]:
import pandas as pd

### 1. Series

In [10]:
# Basic
data = [1,2,3,4,5]                      # Can use tuples too but NOT SET...

series = pd.Series(data)
print(series)
print(type(series))

0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [11]:
# Custom indexing
data = {'a' : 1, 'b': 2, 'c': 3}

series = pd.Series(data)
print(series, '\n')


# OR we can do like this
data = [1,2,3,4,5]
idx = ['a','b','c','d','e']

series = pd.Series(data, index = idx)
print(series, '\n')


# But if I do like this
data = {'a': 1, 'b': 2}
idx = ['d', 'e']
# idx = ['a', 'e']          # a    1.0
                            # e    NaN
                            # dtype: float64

series = pd.Series(data, index = idx)
print(series)


a    1
b    2
c    3
dtype: int64 

a    1
b    2
c    3
d    4
e    5
dtype: int64 

d   NaN
e   NaN
dtype: float64


### 2. Dataframes

In [12]:
# Basic
data = {'Name': ['Sri', 'Riz', 'Sam', 'Dik'],
        'Age': [21, 21, 22, 19],
        'Marks': [93, 98, 97, 99]
        }

df = pd.DataFrame(data)
print(df)
print(type(df), '\n')

print(df['Age'][1])

df

  Name  Age  Marks
0  Sri   21     93
1  Riz   21     98
2  Sam   22     97
3  Dik   19     99
<class 'pandas.core.frame.DataFrame'> 

21


Unnamed: 0,Name,Age,Marks
0,Sri,21,93
1,Riz,21,98
2,Sam,22,97
3,Dik,19,99


In [13]:
# Convert into numpy array
import numpy as np

arr = np.array(df)
arr

array([['Sri', 21, 93],
       ['Riz', 21, 98],
       ['Sam', 22, 97],
       ['Dik', 19, 99]], dtype=object)

In [14]:
# Alternate ways of creation

data = [                            # Using list of dicts...
    {'Name': 'sri', 'Age': 21},
    {'Name': 'Riz', "Age": 20},
    {'Name': 'Sam', 'Age': 21},
    {'Name': 'Dik'}
]

idx = ['a','b','c','d']             # Custom Indexing...

df = pd.DataFrame(data, index=idx)
df

Unnamed: 0,Name,Age
a,sri,21.0
b,Riz,20.0
c,Sam,21.0
d,Dik,


In [15]:
# Accessing elements

print(df['Name'])
print(type(df['Name']), '\n')

print(df.loc['a'])                # Accessing by rows...
print(type(df.loc['a']), '\n')

print(df.iloc[0], '\n')           # Accessing by index...

# Using at method...
print(df.at['a', 'Name'],'\n')
print(df.iat[0,1])

# Modifying
df.iat[0,0] = 'Sharukh'
df


a    sri
b    Riz
c    Sam
d    Dik
Name: Name, dtype: object
<class 'pandas.core.series.Series'> 

Name     sri
Age     21.0
Name: a, dtype: object
<class 'pandas.core.series.Series'> 

Name     sri
Age     21.0
Name: a, dtype: object 

sri 

21.0


Unnamed: 0,Name,Age
a,Sharukh,21.0
b,Riz,20.0
c,Sam,21.0
d,Dik,


### 3. Some Manipulations

In [None]:
# Adding column...
df['Salary'] = [1000, 2000, 3000, 4000]
df

Unnamed: 0,Name,Age,Salary
a,Sharukh,21.0,1000
b,Riz,20.0,2000
c,Sam,21.0,3000
d,Dik,,4000


In [17]:
# Removing Column...
df.drop('Salary', axis=1)  # This only do Temporary Change. For permanent - use "df.drop('Salary', axis=1, inplace = True)"

Unnamed: 0,Name,Age
a,Sharukh,21.0
b,Riz,20.0
c,Sam,21.0
d,Dik,


In [18]:
df.drop('a')
# For default indexing use df.drop(0)

Unnamed: 0,Name,Age,Salary
b,Riz,20.0,2000
c,Sam,21.0,3000
d,Dik,,4000


In [19]:
df

Unnamed: 0,Name,Age,Salary
a,Sharukh,21.0,1000
b,Riz,20.0,2000
c,Sam,21.0,3000
d,Dik,,4000


In [20]:
df.describe()

Unnamed: 0,Age,Salary
count,3.0,4.0
mean,20.666667,2500.0
std,0.57735,1290.994449
min,20.0,1000.0
25%,20.5,1750.0
50%,21.0,2500.0
75%,21.0,3250.0
max,21.0,4000.0


### # Common Errors

In [21]:
pd.Series({1,2,3,4})                    # SET cannot be used
df.drop("Salary")                       # Default axis is 0 (row) for column axis is 1

TypeError: 'set' type is unordered