# Pandas Tutorial
pandas is an open source,BSD-licenced library providing high performence , easy to use data structure and data analysis tools for the python programming language

Agenda
- what is Data Frames?
- What is data Series?
- Different operation in pandas.

In [1]:
# first step to import pandas
import pandas as pd
import numpy as np

In [7]:
## Playing with DataFrame
df=pd.DataFrame(np.arange(0,20).reshape(5,4),index=["Row1","Row2","Row3","Row4","Row5"],columns=["Column1","Column2","Column3","Column4"])

In [8]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [9]:
df.to_csv("Test.csv")

In [14]:
# Accessing the elements
## 1. .loc[]  2. iloc[]
type(df.loc["Row1"])

pandas.core.series.Series

In [15]:
df.loc["Row1"]

Column1    0
Column2    1
Column3    2
Column4    3
Name: Row1, dtype: int32

In [18]:
df.iloc[:,:]

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [19]:
df.iloc[:3,:2]

Unnamed: 0,Column1,Column2
Row1,0,1
Row2,4,5
Row3,8,9


In [20]:
type(df.iloc[:3,:2])

pandas.core.frame.DataFrame

In [22]:
#Convert DataFrame into array
df.iloc[:,1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

In [23]:
df.iloc[:,1:].values.shape

(5, 3)

In [25]:
df.isnull().sum()

Column1    0
Column2    0
Column3    0
Column4    0
dtype: int64

In [28]:
df["Column1"].value_counts()

12    1
4     1
16    1
8     1
0     1
Name: Column1, dtype: int64

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Column1  5 non-null      int32
 1   Column2  5 non-null      int32
 2   Column3  5 non-null      int32
 3   Column4  5 non-null      int32
dtypes: int32(4)
memory usage: 280.0+ bytes


In [34]:
df["Column2"].unique()

array([ 1,  5,  9, 13, 17])

In [33]:
df[["Column2","Column3"]]

Unnamed: 0,Column2,Column3
Row1,1,2
Row2,5,6
Row3,9,10
Row4,13,14
Row5,17,18


In [37]:
# reading the dataset
df=pd.read_excel("Car_Produce.xlsx")

In [38]:
df.head()

Unnamed: 0,ID Number,Cars Produce,Year
0,A,5000,2014
1,B,3000,2015
2,C,3500,2016
3,D,2800,2017
4,E,3900,2018


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ID Number     7 non-null      object
 1   Cars Produce  7 non-null      int64 
 2   Year          7 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 296.0+ bytes


In [41]:
# Describe function not take categotical feature
df.describe()

Unnamed: 0,Cars Produce,Year
count,7.0,7.0
mean,4171.428571,2017.0
std,1290.62554,2.160247
min,2800.0,2014.0
25%,3250.0,2015.5
50%,3900.0,2017.0
75%,4750.0,2018.5
max,6500.0,2020.0


In [43]:
df.shape

(7, 3)

In [44]:
from io import StringIO,BytesIO

In [45]:
data=('col1,col2,col3\n'
     "x,y,1\n"
     "a,b,2\n"
     'c,d,3')

In [46]:
data

'col1,col2,col3\nx,y,1\na,b,2\nc,d,3'

In [47]:
type(data)

str

In [50]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,x,y,1
1,a,b,2
2,c,d,3


In [51]:
df=pd.read_csv(StringIO(data),usecols=lambda x:x.upper() in ["COL1","COL2"])

In [52]:
df.to_csv("Test1.csv")

In [54]:
data=("a,b,c,d\n"
     "1,2,3,4\n"
     "5,6,7,8\n"
     "9,8,7,5")

In [55]:
print(data)

a,b,c,d
1,2,3,4
5,6,7,8
9,8,7,5


In [63]:
df1=pd.read_csv(StringIO(data),dtype=object)

In [64]:
df1

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,5,6,7,8
2,9,8,7,5


In [69]:
df1["a"]

0    1
1    5
2    9
Name: a, dtype: object

In [77]:
df1=pd.read_csv(StringIO(data),dtype={"b":int,"c":float,"a":'Int64'})

In [79]:
df1

Unnamed: 0,a,b,c,d
0,1,2,3.0,4
1,5,6,7.0,8
2,9,8,7.0,5


In [83]:
type(df1["a"][2])

numpy.int64

In [86]:
#Checking the dataType
df1.dtypes

a      Int64
b      int32
c    float64
d      int64
dtype: object

## Read Json to CSV

In [90]:
data='{"Employee_name":"james","email":"123@gmail.com","job_profile":[{"title1":"team Lead","title2":"Sr. Developer"}]}'
df2=pd.read_json(data)

In [91]:
df2

Unnamed: 0,Employee_name,email,job_profile
0,james,123@gmail.com,"{'title1': 'team Lead', 'title2': 'Sr. Develop..."


In [93]:
df2.to_json()

'{"Employee_name":{"0":"james"},"email":{"0":"123@gmail.com"},"job_profile":{"0":{"title1":"team Lead","title2":"Sr. Developer"}}}'

In [94]:
df2.to_json(orient="records")

'[{"Employee_name":"james","email":"123@gmail.com","job_profile":{"title1":"team Lead","title2":"Sr. Developer"}}]'