## import Pandas library

In [101]:
import pandas as pd

## Creating, Reading, Writing

In [102]:
# Dataframe - is table

df = pd.DataFrame({'openPrice':[34495],'lastPrice':[34691]}, index=['29th July'])
print(df)

# convert to CSV
df.to_csv('stockprice.csv')

stockprice = pd.read_csv('stockprice.csv', index_col=0)
stockprice

           openPrice  lastPrice
29th July      34495      34691


Unnamed: 0,openPrice,lastPrice
29th July,34495,34691


In [103]:
# Series - list
series = pd.Series(['hi','I am bob','i am introvert','very intriguing person'])
series

0                        hi
1                  I am bob
2            i am introvert
3    very intriguing person
dtype: object

In [104]:
# Reading Data
data = pd.read_csv('housing.csv')
print(data.head())
print(type(data))

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value ocean_proximity  
0       322.0       126.0         8.3252            452600.0        NEAR BAY  
1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  
2       496.0       177.0         7.2574            352100.0        NEAR BAY  
3       558.0       219.0         5.6431            341300.0        NEAR BAY  
4       565.0       259.0         3.8462            342200.0        NEAR BAY  
<class 'pandas.core.frame.DataFrame'>


In [105]:
longitude_series = pd.Series(data['longitude']);print(longitude_series)

0       -122.23
1       -122.22
2       -122.24
3       -122.25
4       -122.25
          ...  
20635   -121.09
20636   -121.21
20637   -121.22
20638   -121.32
20639   -121.24
Name: longitude, Length: 20640, dtype: float64


## Indexing, Selecting, Assigning

In [106]:
# Indexing and selecting a key
print(data.latitude)
print(data.latitude[0])

0        37.88
1        37.86
2        37.85
3        37.85
4        37.85
         ...  
20635    39.48
20636    39.49
20637    39.43
20638    39.43
20639    39.37
Name: latitude, Length: 20640, dtype: float64
37.88


In [107]:
# iloc - index based selection
# loc  - label based selection
data.iloc[0]

longitude              -122.23
latitude                 37.88
housing_median_age        41.0
total_rooms              880.0
total_bedrooms           129.0
population               322.0
households               126.0
median_income           8.3252
median_house_value    452600.0
ocean_proximity       NEAR BAY
Name: 0, dtype: object

In [108]:
data.iloc[:,1]

0        37.88
1        37.86
2        37.85
3        37.85
4        37.85
         ...  
20635    39.48
20636    39.49
20637    39.43
20638    39.43
20639    39.37
Name: latitude, Length: 20640, dtype: float64

In [109]:
data.iloc[:3,0]

0   -122.23
1   -122.22
2   -122.24
Name: longitude, dtype: float64

In [110]:
data.iloc[[0,1], -1]

0    NEAR BAY
1    NEAR BAY
Name: ocean_proximity, dtype: object

In [111]:
data.loc[0,'latitude']

37.88

In [112]:
data.loc[:, ['longitude','latitude']]

Unnamed: 0,longitude,latitude
0,-122.23,37.88
1,-122.22,37.86
2,-122.24,37.85
3,-122.25,37.85
4,-122.25,37.85
...,...,...
20635,-121.09,39.48
20636,-121.21,39.49
20637,-121.22,39.43
20638,-121.32,39.43


In [113]:
"""iloc uses the Python stdlib indexing scheme, where the first element of the range is included and the 
last one excluded. So 0:10 will select entries 0,...,9. loc, meanwhile, indexes inclusively. 
So 0:10 will select entries 0,...,10."""

# loc  - is used for the labeling indexing
# iloc - is used for the indexing (trough numerical list manulpulation)
print(data.loc[:3, ['longitude','latitude']])
print()
print(data.iloc[:3,0])

   longitude  latitude
0    -122.23     37.88
1    -122.22     37.86
2    -122.24     37.85
3    -122.25     37.85

0   -122.23
1   -122.22
2   -122.24
Name: longitude, dtype: float64


## Condition Selection

In [114]:
data.longitude == -122.23
print(data.loc[data.housing_median_age==52.0])
print(data.loc[data.ocean_proximity.isin(['NEAR BAY'])])
data.loc[data.longitude.notnull()]

       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
2        -122.24     37.85                52.0       1467.0           190.0   
3        -122.25     37.85                52.0       1274.0           235.0   
4        -122.25     37.85                52.0       1627.0           280.0   
5        -122.25     37.85                52.0        919.0           213.0   
6        -122.25     37.84                52.0       2535.0           489.0   
...          ...       ...                 ...          ...             ...   
20142    -119.06     34.36                52.0       1239.0           320.0   
20220    -119.27     34.28                52.0       2239.0           420.0   
20236    -119.27     34.27                52.0        459.0           112.0   
20237    -119.27     34.27                52.0       1577.0           343.0   
20592    -121.58     39.14                52.0        662.0           160.0   

       population  households  median_income  media

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


## Assigning

In [115]:
stockprice['lastPrice'] = 34691.50
stockprice

Unnamed: 0,openPrice,lastPrice
29th July,34495,34691.5


## SUMMARY FUNCTIONS AND MAP

In [116]:
data.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.870553,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.38507,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,296.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [117]:
data.longitude.describe()

count    20640.000000
mean      -119.569704
std          2.003532
min       -124.350000
25%       -121.800000
50%       -118.490000
75%       -118.010000
max       -114.310000
Name: longitude, dtype: float64

In [118]:
# unique values
data.ocean_proximity.unique()

array(['NEAR BAY', '<1H OCEAN', 'INLAND', 'NEAR OCEAN', 'ISLAND'],
      dtype=object)

In [119]:
data.longitude.mean()

-119.56970445736432

In [120]:
data.latitude.value_counts()

34.06    244
34.05    236
34.08    234
34.07    231
34.04    221
        ... 
41.63      1
40.74      1
40.91      1
39.69      1
41.28      1
Name: latitude, Length: 862, dtype: int64