# Pandas 0 Tutorial

### Importing libraries

Use *pip install pandas* or *conda install pandas*

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Basic Syntax

#### Series

Series are essentially a column in a table. It can have a title and custom indices

In [2]:
series = pd.Series([20,21,300],name = 'numbers')
series

0     20
1     21
2    300
Name: numbers, dtype: int64

It can be spliced like a list in python

In [3]:
series[0]

20

In [4]:
series[0:2] #this will create another series

0    20
1    21
Name: numbers, dtype: int64

#### The dataframe

In [5]:
data = {"a":[1,2,3], "b":[4,5,6], "c":[7,8,9]} # this is known as a dictionary
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9


Combining data along columns

In [6]:
col_df = pd.concat([series,df],axis=1) #axis = 1 means concatinate along the columns
col_df

Unnamed: 0,numbers,a,b,c
0,20,1,4,7
1,21,2,5,8
2,300,3,6,9


In [None]:
new_row = pd.DataFrame([[13,14,15]],columns = ['a','b','c'])
new_row

Unnamed: 0,a,b,c
0,13,14,15


Combining data along rows

In [None]:
row_df = pd.concat([df,new_row],axis=0) #axis = 0 means concatinate along the rows
row_df

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9
0,13,14,15


Reset index

In [None]:
row_df.reset_index(drop=True,inplace=True) #drop=True means drop the old index
row_df

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9
3,13,14,15


Adding new columns

In [None]:
names = ['Mungo','Greg','Balthazar']
col_df['Names'] = names
col_df

Unnamed: 0,numbers,a,b,c,Names
0,20,1,4,7,Mungo
1,21,2,5,8,Greg
2,300,3,6,9,Balthazar


Removing columns

In [None]:
col_df.drop(['a','b','c'],axis=1,inplace=True) #axis = 1 means drop the column
col_df

Unnamed: 0,numbers,Names
0,20,Mungo
1,21,Greg
2,300,Balthazar


Renaming columns

In [None]:
col_df.rename(columns={'numbers':'Age'},inplace=True)
col_df

Unnamed: 0,Age,Names
0,20,Mungo
1,21,Greg
2,300,Balthazar


*Exercise 1: Add your name and age to col_df as a new row*

In [None]:
new_row1 = pd.DataFrame([[20,'Hazel']],columns = ['Age','Names'])
new_df = pd.concat([col_df,new_row1],axis=0) #axis = 0 means concatinate along the rows
new_df



Unnamed: 0,Age,Names
0,20,Mungo
1,21,Greg
2,300,Balthazar
0,20,Hazel


*Exercise 2: Add a new column called 'Surname', with a made up surname for each character*

In [None]:
sur = ['aaa','bbb','ccc','ddd']
new_df['Surname'] = sur
new_df

## new_df['Surname'] = ['aaa','bbb','ccc','ddd']

Unnamed: 0,Age,Names,Surname
0,20,Mungo,aaa
1,21,Greg,bbb
2,300,Balthazar,ccc
0,20,Hazel,ddd


#### External data

The head of all dataframes is taken for easier viewing

In [49]:
real_estate = pd.read_csv("real_estate_tutorial.csv")

In [50]:
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9999 entries, 0 to 9998
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Serial Number     9999 non-null   int64  
 1   List Year         9999 non-null   int64  
 2   Date Recorded     9999 non-null   object 
 3   Town              9999 non-null   object 
 4   Address           9999 non-null   object 
 5   Assessed Value    9999 non-null   int64  
 6   Sale Amount       9999 non-null   float64
 7   Sales Ratio       9999 non-null   float64
 8   Property Type     9871 non-null   object 
 9   Residential Type  8925 non-null   object 
 10  Non Use Code      2378 non-null   object 
 11  Assessor Remarks  2328 non-null   object 
 12  OPM remarks       436 non-null    object 
 13  Location          4622 non-null   object 
dtypes: float64(2), int64(3), object(9)
memory usage: 1.1+ MB


Number of null values in each column

In [51]:
real_estate.isna().sum() 

Serial Number          0
List Year              0
Date Recorded          0
Town                   0
Address                0
Assessed Value         0
Sale Amount            0
Sales Ratio            0
Property Type        128
Residential Type    1074
Non Use Code        7621
Assessor Remarks    7671
OPM remarks         9563
Location            5377
dtype: int64

Selecting part of dataframe

In [10]:
real_estate.head() #returns the first 5 rows

Unnamed: 0,Serial Number,List Year,Date Recorded,Town,Address,Assessed Value,Sale Amount,Sales Ratio,Property Type,Residential Type,Non Use Code,Assessor Remarks,OPM remarks,Location
0,2020348,2020,09/13/2021,Ansonia,230 WAKELEE AVE,150500,325000.0,0.463,Commercial,,,,,
1,20002,2020,10/2/2020,Ashford,390 TURNPIKE RD,253000,430000.0,0.5883,Residential,Single Family,,,,
2,200212,2020,3/9/2021,Avon,5 CHESTNUT DRIVE,130400,179900.0,0.7248,Residential,Condo,,,,
3,200243,2020,04/13/2021,Avon,111 NORTHINGTON DRIVE,619290,890000.0,0.6958,Residential,Single Family,,,,
4,200377,2020,7/2/2021,Avon,70 FAR HILLS DRIVE,862330,1447500.0,0.5957,Residential,Single Family,,,,


In [11]:
real_estate[['Serial Number','List Year']].head() #columns can be accessed by name

Unnamed: 0,Serial Number,List Year
0,2020348,2020
1,20002,2020
2,200212,2020
3,200243,2020
4,200377,2020


In [12]:
real_estate.iloc[0:,0:2].head() #or using iloc

Unnamed: 0,Serial Number,List Year
0,2020348,2020
1,20002,2020
2,200212,2020
3,200243,2020
4,200377,2020


In [13]:
real_estate.iloc[0,0] #iloc is used to select rows and columns by their index location

2020348

In [14]:
real_estate[real_estate['List Year']==2020].head() #selecting rows based on a condition

Unnamed: 0,Serial Number,List Year,Date Recorded,Town,Address,Assessed Value,Sale Amount,Sales Ratio,Property Type,Residential Type,Non Use Code,Assessor Remarks,OPM remarks,Location
0,2020348,2020,09/13/2021,Ansonia,230 WAKELEE AVE,150500,325000.0,0.463,Commercial,,,,,
1,20002,2020,10/2/2020,Ashford,390 TURNPIKE RD,253000,430000.0,0.5883,Residential,Single Family,,,,
2,200212,2020,3/9/2021,Avon,5 CHESTNUT DRIVE,130400,179900.0,0.7248,Residential,Condo,,,,
3,200243,2020,04/13/2021,Avon,111 NORTHINGTON DRIVE,619290,890000.0,0.6958,Residential,Single Family,,,,
4,200377,2020,7/2/2021,Avon,70 FAR HILLS DRIVE,862330,1447500.0,0.5957,Residential,Single Family,,,,


In [15]:
real_estate['Town'][real_estate['List Year']==2020].head() #selecting a column based on a condition

0    Ansonia
1    Ashford
2       Avon
3       Avon
4       Avon
Name: Town, dtype: object

*Exercise 3: Show the address of all commercial properties*

In [16]:
real_estate['Address'][real_estate['Property Type']=='Commercial'].to_frame()

Unnamed: 0,Address
0,230 WAKELEE AVE
8,23 AMITY RD
13,119 MONTOWESE ST
29,131 KENT RD
31,1467 SOUTH ST
...,...
9867,140 TRUMBULL ST
9875,246 FEDERAL RD
9913,44 OLD RIDGEBURY RD
9946,59 WEST ST


*Exercise 4: Show all data for properties that are in Avon*

In [17]:
real_estate[real_estate['Town']=='Avon']

Unnamed: 0,Serial Number,List Year,Date Recorded,Town,Address,Assessed Value,Sale Amount,Sales Ratio,Property Type,Residential Type,Non Use Code,Assessor Remarks,OPM remarks,Location
2,200212,2020,3/9/2021,Avon,5 CHESTNUT DRIVE,130400,179900.0,0.7248,Residential,Condo,,,,
3,200243,2020,04/13/2021,Avon,111 NORTHINGTON DRIVE,619290,890000.0,0.6958,Residential,Single Family,,,,
4,200377,2020,7/2/2021,Avon,70 FAR HILLS DRIVE,862330,1447500.0,0.5957,Residential,Single Family,,,,
5,200109,2020,12/9/2020,Avon,57 FAR HILLS DRIVE,847520,1250000.0,0.6780,Residential,Single Family,,,,
93,10530,2001,9/12/2002,Avon,1 PUTNAM LN,97300,200000.0,0.4865,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9798,200225,2020,03/26/2021,Avon,38 COLD SPRING ROAD,290410,525000.0,0.5531,Residential,Single Family,,,,POINT (-72.87822 41.80228)
9821,200398,2020,07/15/2021,Avon,71 OLD WOOD ROAD,450510,800000.0,0.5631,Residential,Single Family,01 - Family,,,
9880,200074,2020,11/17/2020,Avon,31 CHEPACHET ROAD,216680,365000.0,0.5936,Residential,Single Family,,,,
9905,200472,2020,08/19/2021,Avon,12 CENTERBROOK COURT,191290,300000.0,0.6376,Residential,Condo,,,,POINT (-72.88741 41.77122)


*Exercise 5: Show the first 10 rows of the sales ratio and the address in one dataframe*

In [18]:
real_estate[['Sales Ratio','Address']].head(10)

## .iloc[0:10]

Unnamed: 0,Sales Ratio,Address
0,0.463,230 WAKELEE AVE
1,0.5883,390 TURNPIKE RD
2,0.7248,5 CHESTNUT DRIVE
3,0.6958,111 NORTHINGTON DRIVE
4,0.5957,70 FAR HILLS DRIVE
5,0.678,57 FAR HILLS DRIVE
6,1.8015,1539 FARMINGTON AVE
7,0.6081,216 WATCH HILL RD
8,0.6427,23 AMITY RD
9,0.5115,16 DEEPWOOD DRIVE
