# Pandas

In [72]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')

## Reading from CSV files

In [76]:
csv_data = pd.read_csv('data.csv')

In [77]:
csv_data.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


## Dataframes from dictionaries

In [111]:
stats = {'Day' : [1,2,3,4,5,6], 'Visitors': [23,43,23,35,64,12],'Bounce_rate' : [65,75,68,60,74,75]}

df = pd.DataFrame(stats)

In [112]:
df

Unnamed: 0,Day,Visitors,Bounce_rate
0,1,23,65
1,2,43,75
2,3,23,68
3,4,35,60
4,5,64,74
5,6,12,75


## Indexing & Accessing Columns

In [98]:
# We can set the index to any column which makes sense to our data.
# we can even have multiple indices which is not used most of the times
# or df.set_index('Day',inplace = True)
df = df.set_index('Day')
df

Unnamed: 0_level_0,Visitors,Bounce_rate
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
1,23,65
2,43,75
3,23,68
4,35,60
5,64,74
6,12,75


In [99]:
#Accessing a single column
df['Visitors']

Day
1    23
2    43
3    23
4    35
5    64
6    12
Name: Visitors, dtype: int64

In [100]:
df.Visitors

Day
1    23
2    43
3    23
4    35
5    64
6    12
Name: Visitors, dtype: int64

In [101]:
#Accessing multiple columns
df[['Visitors','Bounce_rate']]

Unnamed: 0_level_0,Visitors,Bounce_rate
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
1,23,65
2,43,75
3,23,68
4,35,60
5,64,74
6,12,75


In [102]:
#Converting columns to lists
df.Visitors.tolist() #you can convert to numpy array using np.array()

[23, 43, 23, 35, 64, 12]

In [103]:
import numpy as np

# converting to numpy arrays
np.array(df[['Visitors','Bounce_rate']])

array([[23, 65],
       [43, 75],
       [23, 68],
       [35, 60],
       [64, 74],
       [12, 75]])

In [104]:
# numpy arrays back to dataframe:
data = pd.DataFrame(np.array(df[['Visitors','Bounce_rate']]))
data

Unnamed: 0,0,1
0,23,65
1,43,75
2,23,68
3,35,60
4,64,74
5,12,75


## IO Basics

In [113]:
df = pd.read_csv('housing_prices.csv')
df.head()

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
2,Brokered by Sowae Corp,House for sale,260000,4,2.0,2015.0,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109
3,Brokered by COMPASS,Condo for sale,69000,3,1.0,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
4,Brokered by Sotheby's International Realty - E...,Townhouse for sale,55000000,7,2.373861,14175.0,5 E 64th St,"New York, NY 10065","5 E 64th StNew York, NY 10065",United States,New York,New York County,New York,East 64th Street,"5 E 64th St, New York, NY 10065, USA",40.767224,-73.969856


In [114]:
df.set_index('BROKERTITLE',inplace=True)
df.head()

Unnamed: 0_level_0,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
BROKERTITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
Brokered by Sowae Corp,House for sale,260000,4,2.0,2015.0,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109
Brokered by COMPASS,Condo for sale,69000,3,1.0,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
Brokered by Sotheby's International Realty - East Side Manhattan Brokerage,Townhouse for sale,55000000,7,2.373861,14175.0,5 E 64th St,"New York, NY 10065","5 E 64th StNew York, NY 10065",United States,New York,New York County,New York,East 64th Street,"5 E 64th St, New York, NY 10065, USA",40.767224,-73.969856


In [115]:
#CSV file does not have any attribute like index, so it cannot save the index in the new file
df.to_csv('newcsv.csv')

# or to_html, to_json, to_excel etc..
# df.to_csv('newcsv.csv' ,headers = false) , in this case headers are not saved, but only data. 

In [None]:
pd.read_csv('newcsv.csv')

In [None]:
# you can read it by giving a index
pd.read_csv('newcsv.csv',index_col='BROKERTITLE')

## Renaming columns

In [132]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [133]:
df.set_index('Date',inplace=True)
df.head()


Unnamed: 0_level_0,Duration,Pulse,Maxpulse,Calories
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'2020/12/01',60,110,130,409.1
'2020/12/02',60,117,145,479.0
'2020/12/03',60,103,135,340.0
'2020/12/04',45,109,175,282.4
'2020/12/05',45,117,148,406.0


In [134]:
df.columns = ['Time spent','pulse','Maxvalue','Calories']
df.head()

Unnamed: 0_level_0,Time spent,pulse,Maxvalue,Calories
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'2020/12/01',60,110,130,409.1
'2020/12/02',60,117,145,479.0
'2020/12/03',60,103,135,340.0
'2020/12/04',45,109,175,282.4
'2020/12/05',45,117,148,406.0


In [136]:
df.rename(columns={'Time spent' : 'Duration'},inplace=True)
df.head()

Unnamed: 0_level_0,Duration,pulse,Maxvalue,Calories
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'2020/12/01',60,110,130,409.1
'2020/12/02',60,117,145,479.0
'2020/12/03',60,103,135,340.0
'2020/12/04',45,109,175,282.4
'2020/12/05',45,117,148,406.0


## Concatenating / Appending Dataframes

In [137]:
df1 = pd.DataFrame({'HPI':[80,85,88,85],
                    'Int_rate':[2, 3, 2, 2],
                    'US_GDP_Thousands':[50, 55, 65, 55]},
                   index = [2001, 2002, 2003, 2004])

df2 = pd.DataFrame({'HPI':[80,85,88,85],
                    'Int_rate':[2, 3, 2, 2],
                    'US_GDP_Thousands':[50, 55, 65, 55]},
                   index = [2005, 2006, 2007, 2008])

df3 = pd.DataFrame({'HPI':[80,85,88,85],
                    'Int_rate':[2, 3, 2, 2],
                    'Low_tier_HPI':[50, 52, 50, 53]},
                   index = [2001, 2002, 2003, 2004])

In [138]:
concat = pd.concat([df1,df2])
concat

Unnamed: 0,HPI,Int_rate,US_GDP_Thousands
2001,80,2,50
2002,85,3,55
2003,88,2,65
2004,85,2,55
2005,80,2,50
2006,85,3,55
2007,88,2,65
2008,85,2,55


In [139]:
concat = pd.concat([df1,df2,df3])
concat

Unnamed: 0,HPI,Int_rate,US_GDP_Thousands,Low_tier_HPI
2001,80,2,50.0,
2002,85,3,55.0,
2003,88,2,65.0,
2004,85,2,55.0,
2005,80,2,50.0,
2006,85,3,55.0,
2007,88,2,65.0,
2008,85,2,55.0,
2001,80,2,,50.0
2002,85,3,,52.0


## Count Data in dataframes

In [164]:
df = pd.read_csv('housing_prices.csv')
df

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.000000,1400.000000,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.000000,17545.000000,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
2,Brokered by Sowae Corp,House for sale,260000,4,2.000000,2015.000000,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109
3,Brokered by COMPASS,Condo for sale,69000,3,1.000000,445.000000,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
4,Brokered by Sotheby's International Realty - E...,Townhouse for sale,55000000,7,2.373861,14175.000000,5 E 64th St,"New York, NY 10065","5 E 64th StNew York, NY 10065",United States,New York,New York County,New York,East 64th Street,"5 E 64th St, New York, NY 10065, USA",40.767224,-73.969856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4796,Brokered by COMPASS,Co-op for sale,599000,1,1.000000,2184.207862,222 E 80th St Apt 3A,"Manhattan, NY 10075","222 E 80th St Apt 3AManhattan, NY 10075",New York,New York County,New York,Manhattan,222,"222 E 80th St #3a, New York, NY 10075, USA",40.774350,-73.955879
4797,Brokered by Mjr Real Estate Llc,Co-op for sale,245000,1,1.000000,2184.207862,97-40 62 Dr Unit Lg,"Rego Park, NY 11374","97-40 62 Dr Unit LgRego Park, NY 11374",United States,New York,Queens County,Queens,62nd Drive,"97-40 62nd Dr, Rego Park, NY 11374, USA",40.732538,-73.860152
4798,Brokered by Douglas Elliman - 575 Madison Ave,Co-op for sale,1275000,1,1.000000,2184.207862,427 W 21st St Unit Garden,"New York, NY 10011","427 W 21st St Unit GardenNew York, NY 10011",United States,New York,New York County,New York,West 21st Street,"427 W 21st St, New York, NY 10011, USA",40.745882,-74.003398
4799,Brokered by E Realty International Corp,Condo for sale,598125,2,1.000000,655.000000,91-23 Corona Ave Unit 4G,"Elmhurst, NY 11373","91-23 Corona Ave Unit 4GElmhurst, NY 11373",New York,Queens County,Queens,Flushing,91-23,"91-23 Corona Ave. #4b, Flushing, NY 11373, USA",40.742770,-73.872752


In [154]:
# count distinct values
df['STATE'].value_counts()

STATE
Brooklyn, NY 11235            102
Brooklyn, NY 11209             94
Forest Hills, NY 11375         91
Brooklyn, NY 11234             90
Staten Island, NY 10314        86
                             ... 
Queens, NY 11427                1
Queens, NY 11429                1
Long Island City, NY 11109      1
Brooklyn, NY 11694              1
Ozone Park, NY 11416            1
Name: count, Length: 308, dtype: int64

In [155]:
# ascending count
df['STATE'].value_counts(ascending=True)

STATE
Ozone Park, NY 11416            1
Long Island City, NY 11109      1
Queens, NY 11697                1
Queens, NY 11429                1
Ny, NY 10017                    1
                             ... 
Staten Island, NY 10314        86
Brooklyn, NY 11234             90
Forest Hills, NY 11375         91
Brooklyn, NY 11209             94
Brooklyn, NY 11235            102
Name: count, Length: 308, dtype: int64

In [158]:
# get relative frequency
df['STATE'].value_counts(normalize=True)

STATE
Brooklyn, NY 11235            0.021246
Brooklyn, NY 11209            0.019579
Forest Hills, NY 11375        0.018954
Brooklyn, NY 11234            0.018746
Staten Island, NY 10314       0.017913
                                ...   
Queens, NY 11427              0.000208
Queens, NY 11429              0.000208
Long Island City, NY 11109    0.000208
Brooklyn, NY 11694            0.000208
Ozone Park, NY 11416          0.000208
Name: proportion, Length: 308, dtype: float64

In [168]:
# Get specific item count
df['STATE'].value_counts()['Arverne, NY 11692']

6

In [166]:
# count unique values - size
df.groupby('STATE').size()

STATE
Arverne, NY 11692                6
Astoria, NY 11101                2
Astoria, NY 11102                1
Astoria, NY 11103                7
Astoria, NY 11105               19
                                ..
Stuyvesant Heights, NY 11233     2
Sunnyside, NY 11104              3
Whitestone, NY 11357            36
Woodhaven, NY 11421             17
Woodside, NY 11377              36
Length: 308, dtype: int64

In [167]:
# count unique values - count
df.groupby('STATE').count()

Unnamed: 0_level_0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
"Arverne, NY 11692",6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
"Astoria, NY 11101",2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
"Astoria, NY 11102",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"Astoria, NY 11103",7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
"Astoria, NY 11105",19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Stuyvesant Heights, NY 11233",2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
"Sunnyside, NY 11104",3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
"Whitestone, NY 11357",36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36
"Woodhaven, NY 11421",17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17


## Adding new Columns

In [171]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0
5,60,'2020/12/06',102,127,300.0
6,60,'2020/12/07',110,136,374.0
7,450,'2020/12/08',104,134,253.3
8,30,'2020/12/09',109,133,195.1
9,60,'2020/12/10',98,124,269.0


In [177]:
# Add column from list
speed = np.random.randint(3,10,32).tolist()
print(speed)

df['speed'] = speed
df.head()

[4, 8, 9, 3, 3, 8, 6, 6, 8, 3, 7, 9, 4, 8, 7, 3, 9, 6, 4, 7, 3, 3, 8, 5, 7, 6, 3, 8, 6, 9, 3, 3]


Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories,speed
0,60,'2020/12/01',110,130,409.1,4
1,60,'2020/12/02',117,145,479.0,8
2,60,'2020/12/03',103,135,340.0,9
3,45,'2020/12/04',109,175,282.4,3
4,45,'2020/12/05',117,148,406.0,3


In [179]:
#Add Nan Value
df['fat/slim'] = [np.nan]*len(df)
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories,speed,fat/slim
0,60,'2020/12/01',110,130,409.1,4,
1,60,'2020/12/02',117,145,479.0,8,
2,60,'2020/12/03',103,135,340.0,9,
3,45,'2020/12/04',109,175,282.4,3,
4,45,'2020/12/05',117,148,406.0,3,


In [180]:
# adding column with insert as it allows position
df.insert(0, "Weight", np.random.randint(60,90,32).tolist())
df.head()

Unnamed: 0,Weight,Duration,Date,Pulse,Maxpulse,Calories,speed,fat/slim
0,63,60,'2020/12/01',110,130,409.1,4,
1,75,60,'2020/12/02',117,145,479.0,8,
2,89,60,'2020/12/03',103,135,340.0,9,
3,86,45,'2020/12/04',109,175,282.4,3,
4,88,45,'2020/12/05',117,148,406.0,3,


In [182]:
# using assign to create a new dataframe
newdf = df.assign(test = [np.nan]*len(df))
newdf.head()

Unnamed: 0,Weight,Duration,Date,Pulse,Maxpulse,Calories,speed,fat/slim,test
0,63,60,'2020/12/01',110,130,409.1,4,,
1,75,60,'2020/12/02',117,145,479.0,8,,
2,89,60,'2020/12/03',103,135,340.0,9,,
3,86,45,'2020/12/04',109,175,282.4,3,,
4,88,45,'2020/12/05',117,148,406.0,3,,


## Remove columns and rows

In [184]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [187]:
# remove column
df.drop('Maxpulse',axis=1).head() #axis=1 represents the labels at the top

# you need to reassign if you want to update the df or inplace = True

Unnamed: 0,Duration,Date,Pulse,Calories
0,60,'2020/12/01',110,409.1
1,60,'2020/12/02',117,479.0
2,60,'2020/12/03',103,340.0
3,45,'2020/12/04',109,282.4
4,45,'2020/12/05',117,406.0


In [190]:
# remove Row
df.drop(1)
#row with index 1 is deleted.

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0
5,60,'2020/12/06',102,127,300.0
6,60,'2020/12/07',110,136,374.0
7,450,'2020/12/08',104,134,253.3
8,30,'2020/12/09',109,133,195.1
9,60,'2020/12/10',98,124,269.0
10,60,'2020/12/11',103,147,329.3


## Grab rows, points, subsets from dataframe

In [192]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [193]:
# grab rows - method1
df.loc[2]

Duration              60
Date        '2020/12/03'
Pulse                103
Maxpulse             135
Calories           340.0
Name: 2, dtype: object

In [195]:
# grab rows - method2
df.iloc[2]

Duration              60
Date        '2020/12/03'
Pulse                103
Maxpulse             135
Calories           340.0
Name: 2, dtype: object

In [196]:
# grab points
df.loc[1,'Pulse']

117

In [197]:
#grab subsets
df.loc[[1,3],['Pulse','Calories']]

Unnamed: 0,Pulse,Calories
1,117,479.0
3,109,282.4


## Conditional selections

In [200]:
df = pd.read_csv('housing_prices.csv')
df.head()

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
2,Brokered by Sowae Corp,House for sale,260000,4,2.0,2015.0,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109
3,Brokered by COMPASS,Condo for sale,69000,3,1.0,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
4,Brokered by Sotheby's International Realty - E...,Townhouse for sale,55000000,7,2.373861,14175.0,5 E 64th St,"New York, NY 10065","5 E 64th StNew York, NY 10065",United States,New York,New York County,New York,East 64th Street,"5 E 64th St, New York, NY 10065, USA",40.767224,-73.969856


In [201]:
# get a dataframe with data
df[df == "Condo for sale"]

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,,Condo for sale,,,,,,,,,,,,,,,
1,,Condo for sale,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,
3,,Condo for sale,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4796,,,,,,,,,,,,,,,,,
4797,,,,,,,,,,,,,,,,,
4798,,,,,,,,,,,,,,,,,
4799,,Condo for sale,,,,,,,,,,,,,,,


In [212]:
# getting that column
df[df == 'Condo for sale']['TYPE']

0       Condo for sale
1       Condo for sale
2                  NaN
3       Condo for sale
4                  NaN
             ...      
4796               NaN
4797               NaN
4798               NaN
4799    Condo for sale
4800               NaN
Name: TYPE, Length: 4801, dtype: object

In [203]:
df[df['TYPE'] == 'Condo for sale'].head()

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
3,Brokered by COMPASS,Condo for sale,69000,3,1.0,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
6,Brokered by Douglas Elliman - 575 Madison Ave,Condo for sale,899500,2,2.0,2184.207862,157 W 126th St Unit 1B,"New York, NY 10027","157 W 126th St Unit 1BNew York, NY 10027",New York,New York County,New York,Manhattan,157,"157 W 126th St #1b, New York, NY 10027, USA",40.809448,-73.946777
15,Brokered by Awaye Realty,Condo for sale,549000,2,2.0,1000.0,4654 Amboy Rd Unit 2B,"Staten Island, NY 10312","4654 Amboy Rd Unit 2BStaten Island, NY 10312",New York,Richmond County,Staten Island,Annadale,4654,"4654 Amboy Rd #2b, Staten Island, NY 10312, USA",40.540621,-74.167163


In [210]:
# multiple conditions
df[(df['TYPE'] == 'Condo for sale') & (df['BEDS'] == 3)]

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
3,Brokered by COMPASS,Condo for sale,69000,3,1.000000,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
70,Brokered by Serhant,Condo for sale,1595000,3,3.000000,1413.0,924 Lafayette Ave Unit Ph,"Brooklyn, NY 11221","924 Lafayette Ave Unit PhBrooklyn, NY 11221",United States,New York,Kings County,Brooklyn,Lafayette Avenue,"924 Lafayette Ave, Brooklyn, NY 11221, USA",40.691514,-73.935444
71,Brokered by Douglas Elliman - 575 Madison Ave,Condo for sale,655000,3,2.000000,803.0,1504 Jefferson Ave Unit Gardenb,"Brooklyn, NY 11237","1504 Jefferson Ave Unit GardenbBrooklyn, NY 11237",United States,New York,Kings County,Brooklyn,Jefferson Avenue,"1504 Jefferson Ave, Brooklyn, NY 11237, USA",40.696727,-73.907404
76,Brokered by Corcoran Chelsea/Flatiron,Condo for sale,31000000,3,4.000000,4492.0,111 W 57th St # 50,"New York, NY 10019","111 W 57th St # 50New York, NY 10019",New York,New York County,New York,Manhattan,111,"111 W 57th St #50, New York, NY 10019, USA",40.764953,-73.977568
83,Brokered by Brown Harris Stevens - 1926 Broadway,Condo for sale,5750000,3,2.000000,2223.0,250 E 21st St Unit 12B,"Manhattan, NY 10010","250 E 21st St Unit 12BManhattan, NY 10010",New York,New York County,New York,Manhattan,250,"250 E 21st St #12b, New York, NY 10010, USA",40.736608,-73.982282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4729,Brokered by City Connections Realty,Condo for sale,529000,3,1.000000,367.0,1 Wall Street Ct Apt 1406,"New York, NY 10005","1 Wall Street Ct Apt 1406New York, NY 10005",New York,New York County,New York,Manhattan,1,"1 Wall St APT 1406, New York, NY 10005, USA",40.705216,-74.008531
4770,Brokered by Sotheby's International Realty - E...,Condo for sale,2600000,3,2.000000,1729.0,242 E 15th St Apt 1,"New York, NY 10003","242 E 15th St Apt 1New York, NY 10003",New York,New York County,New York,Manhattan,242,"242 E 15th St APT 1, New York, NY 10003, USA",40.733031,-73.985037
4782,Brokered by Sotheby's International Realty - E...,Condo for sale,2600000,3,2.000000,1729.0,242 E 15th St Apt 1,"New York, NY 10003","242 E 15th St Apt 1New York, NY 10003",New York,New York County,New York,Manhattan,242,"242 E 15th St APT 1, New York, NY 10003, USA",40.733031,-73.985037
4787,Brokered by REAL NEW YORK - Remote Brooklyn,Condo for sale,499000,3,1.000000,472.0,127 W 112th St Apt 1B,"New York, NY 10026","127 W 112th St Apt 1BNew York, NY 10026",New York,New York County,New York,Manhattan,127,"127 W 112th St #1b, New York, NY 10026, USA",40.800382,-73.953136


In [215]:
df[(df['TYPE'] == 'House for sale') & (df['BEDS'] == 3)]['PRICE']

20      639999
57      998800
58      998800
86      585000
88      595000
         ...  
4748    829999
4761    975000
4771    825900
4775    749000
4779    799000
Name: PRICE, Length: 491, dtype: int64

## Change / Reset indices

- Refer to the indexing & accessing columns section

In [227]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [228]:
df.set_index('Date',inplace=True)
df.head()

Unnamed: 0_level_0,Duration,Pulse,Maxpulse,Calories
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'2020/12/01',60,110,130,409.1
'2020/12/02',60,117,145,479.0
'2020/12/03',60,103,135,340.0
'2020/12/04',45,109,175,282.4
'2020/12/05',45,117,148,406.0


In [229]:
# reset index:
df.reset_index(inplace=True) 
df.head()

Unnamed: 0,Date,Duration,Pulse,Maxpulse,Calories
0,'2020/12/01',60,110,130,409.1
1,'2020/12/02',60,117,145,479.0
2,'2020/12/03',60,103,135,340.0
3,'2020/12/04',45,109,175,282.4
4,'2020/12/05',45,117,148,406.0


## Fixing incomplete data

In [233]:
stuff = {'A': [1,2,3], 'B': [4,np.nan,6], 'C': [7,8,9]}

df = pd.DataFrame(stuff)
df

Unnamed: 0,A,B,C
0,1,4.0,7
1,2,,8
2,3,6.0,9


In [234]:
#Drop rows with Nan values
df.dropna()

Unnamed: 0,A,B,C
0,1,4.0,7
2,3,6.0,9


In [236]:
#Drop columns with Nan values
df.dropna(axis=1)

Unnamed: 0,A,C
0,1,7
1,2,8
2,3,9


In [237]:
df['C'] = [7,np.nan,np.nan]
df

Unnamed: 0,A,B,C
0,1,4.0,7.0
1,2,,
2,3,6.0,


In [238]:
# set threshold
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1,4.0,7.0
2,3,6.0,


In [239]:
df.dropna(thresh=2,axis = 1)

Unnamed: 0,A,B
0,1,4.0
1,2,
2,3,6.0


In [243]:
df.fillna(value = 'anything')

Unnamed: 0,A,B,C
0,1,4.0,7.0
1,2,anything,anything
2,3,6.0,anything


In [245]:
# use math
df.fillna(df['B'].mean())

Unnamed: 0,A,B,C
0,1,4.0,7.0
1,2,5.0,5.0
2,3,6.0,5.0


## Group by - Dataframe

In [256]:
df = pd.read_csv('housing_prices.csv')
df.head()

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
2,Brokered by Sowae Corp,House for sale,260000,4,2.0,2015.0,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109
3,Brokered by COMPASS,Condo for sale,69000,3,1.0,445.0,2 E 55th St Unit 908W33,"Manhattan, NY 10022","2 E 55th St Unit 908W33Manhattan, NY 10022",United States,New York,New York County,New York,East 55th Street,"2 E 55th St, New York, NY 10022, USA",40.761398,-73.974613
4,Brokered by Sotheby's International Realty - E...,Townhouse for sale,55000000,7,2.373861,14175.0,5 E 64th St,"New York, NY 10065","5 E 64th StNew York, NY 10065",United States,New York,New York County,New York,East 64th Street,"5 E 64th St, New York, NY 10065, USA",40.767224,-73.969856


In [257]:
# group by brokers
groupedData = df.groupby('BROKERTITLE')

In [253]:
groupedData.sum()

Unnamed: 0_level_0,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
BROKERTITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Brokered by 1 Oak Real Estate Hub Inc,Multi-family home for sale,1200000,7,2.373861,2184.207862,1278 E 70th St,"Brooklyn, NY 11234","1278 E 70th StBrooklyn, NY 11234",United States,New York,Kings County,Brooklyn,East 70th Street,"1278 E 70th St, Brooklyn, NY 11234, USA",40.624316,-73.913604
Brokered by 5 Boro Realty Corp,Condo for saleLand for saleForeclosureHouse fo...,17875995,86,51.495443,41328.662896,1368 E 92nd St Unit 3001905 Fulton St719 Vermo...,"Brooklyn, NY 11236Bedford Stuyvesant, NY 11233...","1368 E 92nd St Unit 300Brooklyn, NY 112361905 ...",New YorkUnited StatesUnited StatesUnited State...,Kings CountyNew YorkNew YorkNew YorkNew YorkNe...,BrooklynKings CountyKings CountyKings CountyKi...,CanarsieBrooklynBrooklynBrooklynBrooklynBrookl...,1368Fulton StreetVermont StreetFlatlands Avenu...,"1368 E 92nd St #300, Brooklyn, NY 11236, USA19...",690.918463,-1256.772017
Brokered by A J Realty Of Queens Inc,Multi-family home for sale,899000,4,2.373861,2184.207862,109-06 112th St,"South Ozone Park, NY 11420","109-06 112th StSouth Ozone Park, NY 11420",United States,New York,Queens County,Queens,112th Street,"109-06 112th St, Jamaica, NY 11420, USA",40.680320,-73.828179
Brokered by A P Services Inc,Multi-family home for sale,2295000,3,7.000000,2184.207862,4415 Atlantic Ave,"Brooklyn, NY 11224","4415 Atlantic AveBrooklyn, NY 11224",United States,New York,Kings County,Brooklyn,Atlantic Avenue,"4415 Atlantic Ave, Brooklyn, NY 11224, USA",40.575795,-74.009975
Brokered by AAA Young Shuen Realty Inc,House for saleHouse for sale,2576000,10,4.747722,4368.415724,146-06 13th Ave146-06 13th Ave,"Whitestone, NY 11357Whitestone, NY 11357","146-06 13th AveWhitestone, NY 11357146-06 13th...",United StatesUnited States,New YorkNew York,Queens CountyQueens County,QueensQueens,13th Avenue13th Avenue,"146-06 13th Ave, Flushing, NY 11357, USA146-06...",81.577331,-147.641586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Brokered by eXp Realty BKNY,Multi-family home for saleHouse for saleCondo ...,10421000,36,25.000000,18383.000000,66 Bay 22nd St231 Freeborn St269 Bay 8th St Ap...,"Brooklyn, NY 11214Staten Island, NY 10306Brook...","66 Bay 22nd StBrooklyn, NY 11214231 Freeborn S...",United StatesUnited StatesNew YorkNew YorkUnit...,New YorkNew YorkKings CountyKings CountyNew Yo...,Kings CountyRichmond CountyBrooklynBrooklynKin...,BrooklynStaten IslandBath BeachMidwoodBrooklyn...,Bay 22nd StreetFreeborn Street26912932483Brown...,"66 Bay 22nd St, Brooklyn, NY 11214, USA231 Fre...",568.396777,-1035.763977
Brokered by eXp Realty NYC,ContingentContingentContingentContingent,2984000,12,5.000000,5410.207862,217 E 5th St Apt 81363 Sutter Ave2 Grace Ct Ap...,"New York, NY 10003Brooklyn, NY 11208Brooklyn, ...","217 E 5th St Apt 8New York, NY 100031363 Sutte...",New YorkUnited StatesNew YorkNew York,New York CountyNew YorkKings CountyNew York Co...,New YorkKings CountyBrooklynNew York,ManhattanBrooklynBrooklyn HeightsManhattan,217Sutter Avenue2152,"217 E 5th St Apt 8, New York, NY 10003, USA136...",162.872648,-295.812864
Brokered by qvtopservice,House for saleCo-op for saleCo-op for saleCo-o...,3816999,20,11.000000,14604.415724,102-26 213th St1040 Neilson St Apt 3A1040 Neil...,"Queens Village, NY 11429Far Rockaway, NY 11691...","102-26 213th StQueens Village, NY 114291040 Ne...",United StatesNew YorkNew YorkUnited StatesNew ...,New YorkQueens CountyQueens CountyNew YorkKing...,Queens CountyQueensQueensQueens CountyBrooklyn...,QueensFar RockawayFar RockawayQueensOcean Hill...,213th StreetOct-401040140th Street478114th Dri...,"102-26 213th St, Jamaica, NY 11429, USA10-40 N...",325.429667,-590.256437
Built by Toll Brothers,For saleFor saleFor sale,4970000,5,5.000000,3153.000000,The Rockwell # 3AThe Rockwell # 6EThe Rockwell...,"New York, NY 10025New York, NY 10025New York, ...","The Rockwell # 3ANew York, NY 10025The Rockwel...",United StatesUnited StatesUnited States,New YorkNew YorkNew York,New York CountyNew York CountyNew York County,New YorkNew YorkNew York,West 103rd StreetWest 103rd StreetWest 103rd S...,"218 W 103rd St, New York, NY 10025, USA218 W 1...",122.395974,-221.903711


In [261]:
groupedData['PRICE'].mean()

BROKERTITLE
Brokered by 1 Oak Real Estate Hub Inc     1.200000e+06
Brokered by 5 Boro Realty Corp            1.051529e+06
Brokered by A J Realty Of Queens Inc      8.990000e+05
Brokered by A P Services Inc              2.295000e+06
Brokered by AAA Young Shuen Realty Inc    1.288000e+06
                                              ...     
Brokered by eXp Realty BKNY               7.443571e+05
Brokered by eXp Realty NYC                7.460000e+05
Brokered by qvtopservice                  4.771249e+05
Built by Toll Brothers                    1.656667e+06
NoBroker                                  1.001181e+06
Name: PRICE, Length: 1036, dtype: float64

In [271]:
groupedData['PRICE'].max()

BROKERTITLE
Brokered by 1 Oak Real Estate Hub Inc     1200000
Brokered by 5 Boro Realty Corp            1900000
Brokered by A J Realty Of Queens Inc       899000
Brokered by A P Services Inc              2295000
Brokered by AAA Young Shuen Realty Inc    1288000
                                           ...   
Brokered by eXp Realty BKNY               2199000
Brokered by eXp Realty NYC                1150000
Brokered by qvtopservice                   699000
Built by Toll Brothers                    2280000
NoBroker                                  4800000
Name: PRICE, Length: 1036, dtype: int64

In [272]:
groupedData['PRICE'].std()

BROKERTITLE
Brokered by 1 Oak Real Estate Hub Inc              NaN
Brokered by 5 Boro Realty Corp            4.394615e+05
Brokered by A J Realty Of Queens Inc               NaN
Brokered by A P Services Inc                       NaN
Brokered by AAA Young Shuen Realty Inc    0.000000e+00
                                              ...     
Brokered by eXp Realty BKNY               5.463518e+05
Brokered by eXp Realty NYC                3.126670e+05
Brokered by qvtopservice                  2.534805e+05
Built by Toll Brothers                    6.689046e+05
NoBroker                                  1.084403e+06
Name: PRICE, Length: 1036, dtype: float64

In [273]:
groupedData.describe()

Unnamed: 0_level_0,PRICE,PRICE,PRICE,PRICE,PRICE,PRICE,PRICE,PRICE,BEDS,BEDS,...,LATITUDE,LATITUDE,LONGITUDE,LONGITUDE,LONGITUDE,LONGITUDE,LONGITUDE,LONGITUDE,LONGITUDE,LONGITUDE
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
BROKERTITLE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Brokered by 1 Oak Real Estate Hub Inc,1.0,1.200000e+06,,1200000.0,1200000.0,1200000.0,1200000.0,1200000.0,1.0,7.000000,...,40.624316,40.624316,1.0,-73.913604,,-73.913604,-73.913604,-73.913604,-73.913604,-73.913604
Brokered by 5 Boro Realty Corp,17.0,1.051529e+06,4.394615e+05,99000.0,835000.0,1175000.0,1299999.0,1900000.0,17.0,5.058824,...,40.669698,40.678958,17.0,-73.927766,0.027581,-74.007221,-73.937647,-73.920319,-73.913355,-73.890638
Brokered by A J Realty Of Queens Inc,1.0,8.990000e+05,,899000.0,899000.0,899000.0,899000.0,899000.0,1.0,4.000000,...,40.680320,40.680320,1.0,-73.828179,,-73.828179,-73.828179,-73.828179,-73.828179,-73.828179
Brokered by A P Services Inc,1.0,2.295000e+06,,2295000.0,2295000.0,2295000.0,2295000.0,2295000.0,1.0,3.000000,...,40.575795,40.575795,1.0,-74.009975,,-74.009975,-74.009975,-74.009975,-74.009975,-74.009975
Brokered by AAA Young Shuen Realty Inc,2.0,1.288000e+06,0.000000e+00,1288000.0,1288000.0,1288000.0,1288000.0,1288000.0,2.0,5.000000,...,40.788666,40.788666,2.0,-73.820793,0.000000,-73.820793,-73.820793,-73.820793,-73.820793,-73.820793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Brokered by eXp Realty BKNY,14.0,7.443571e+05,5.463518e+05,265000.0,403750.0,512000.0,990000.0,2199000.0,14.0,2.571429,...,40.616886,40.632963,14.0,-73.983141,0.037386,-74.093167,-73.983619,-73.975748,-73.967741,-73.937245
Brokered by eXp Realty NYC,4.0,7.460000e+05,3.126670e+05,389000.0,618500.0,722500.0,850000.0,1150000.0,4.0,3.000000,...,40.739923,40.777093,4.0,-73.953216,0.059976,-73.998656,-73.992292,-73.973361,-73.934285,-73.867485
Brokered by qvtopservice,8.0,4.771249e+05,2.534805e+05,100000.0,266250.0,624499.5,655500.0,699000.0,8.0,2.500000,...,40.702705,40.731836,8.0,-73.782055,0.061141,-73.917831,-73.780885,-73.752709,-73.747759,-73.745280
Built by Toll Brothers,3.0,1.656667e+06,6.689046e+05,950000.0,1345000.0,1740000.0,2010000.0,2280000.0,3.0,1.666667,...,40.798658,40.798658,3.0,-73.967904,0.000000,-73.967904,-73.967904,-73.967904,-73.967904,-73.967904


## Applying functions on dataframs

In [274]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [275]:
def sectomin(x):
    return x/60

In [276]:
df['Duration'].apply(sectomin)

0     1.00
1     1.00
2     1.00
3     0.75
4     0.75
5     1.00
6     1.00
7     7.50
8     0.50
9     1.00
10    1.00
11    1.00
12    1.00
13    1.00
14    1.00
15    1.00
16    1.00
17    1.00
18    0.75
19    1.00
20    0.75
21    1.00
22    0.75
23    1.00
24    0.75
25    1.00
26    1.00
27    1.00
28    1.00
29    1.00
30    1.00
31    1.00
Name: Duration, dtype: float64

In [277]:
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [278]:
# use lambdas

df['Duration'].apply(lambda x : x / 60)

0     1.00
1     1.00
2     1.00
3     0.75
4     0.75
5     1.00
6     1.00
7     7.50
8     0.50
9     1.00
10    1.00
11    1.00
12    1.00
13    1.00
14    1.00
15    1.00
16    1.00
17    1.00
18    0.75
19    1.00
20    0.75
21    1.00
22    0.75
23    1.00
24    0.75
25    1.00
26    1.00
27    1.00
28    1.00
29    1.00
30    1.00
31    1.00
Name: Duration, dtype: float64

## Sorting & ordering data

In [284]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,'2020/12/01',110,130,409.1
1,60,'2020/12/02',117,145,479.0
2,60,'2020/12/03',103,135,340.0
3,45,'2020/12/04',109,175,282.4
4,45,'2020/12/05',117,148,406.0


In [287]:
df.sort_values("Calories",ascending=False,inplace=True)
df.head()

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
1,60,'2020/12/02',117,145,479.0
0,60,'2020/12/01',110,130,409.1
4,45,'2020/12/05',117,148,406.0
30,60,'2020/12/30',102,129,380.3
14,60,'2020/12/14',104,132,379.3
