In [29]:
import pandas as pd
import numpy as np
from textblob import TextBlob
from sklearn.metrics import classification_report

import folium
from geopy.geocoders import Nominatim
import geopy.distance
from geopy.distance import great_circle as GRC
from geopy.distance import geodesic as GD

import os
import geopandas
import requests
import pyproj

# Sentiment analysis

In [6]:
def get_polarity_and_subjectivity(s):
    pol = TextBlob(s).sentiment.polarity
    sub = TextBlob(s).sentiment.subjectivity
    print(f"Polarity: {pol}\nSubjectivity: {sub}")

In [16]:
## if polarity < 0 --> negative else positive

In [7]:
s = "it was hours ago i came in.... and it`s only now i realised when i went to buy something online"
get_polarity_and_subjectivity(s)

Polarity: 0.0
Subjectivity: 1.0


In [9]:
s = "as much as i love to be hopeful, i reckon the chances are minimal =P i`m never gonna get my cake and stuff"
get_polarity_and_subjectivity(s)

Polarity: 0.2
Subjectivity: 0.6


In [10]:
s = " you know you want to come keep me company whilst mum`s at her friends for the night it`s such a nice evening!"
get_polarity_and_subjectivity(s)

Polarity: 0.375
Subjectivity: 0.75


# Geospatial data

Geospatial Analysis:

The objective of this tutorial is to use geospatial analysis to make decisions about opening up new restaurants(or retail stores, bank branches, airports, etc). For our analysis, we use a dataset consisting of Starbucks and McDonalds store coordinates in New York.

Our tutorial consists of the following steps:


1.   Compute the distance of the stores
2.   Visualize data on a map
3.   Compute number of stores located in a given radius
4.   Compute the closest and farthest stores from Empire State Building



In [31]:
geolocator = Nominatim(user_agent="geoapiExercises")
ladd1 = "Jamshedpur"
print("Location address:",ladd1)
l1 = geolocator.geocode(ladd1)
print("Latitude and Longitude of the said address:")
print((l1.latitude, l1.longitude))

ladd2 = "Kamarhati"
print("\nLocation address:",ladd2)
location = geolocator.geocode(ladd2)
print("Latitude and Longitude of the said address:")
print((location.latitude, location.longitude))

geopy.distance.distance((l1.latitude, l1.longitude), (location.latitude, location.longitude)).km


Location address: Jamshedpur
Latitude and Longitude of the said address:
(22.8015194, 86.2029579)

Location address: Kamarhati
Latitude and Longitude of the said address:
(22.6810246, 88.3715343)


223.14669071130305

In [32]:
#Read file containing location details
df = pd.read_csv('https://drive.google.com/uc?id=19JERBe0ndadkMPKDFZ0rTEZqdI50zDfk')
df.head(3)

Unnamed: 0,lat,lng,store,address
0,40.711,-74.000878,Starbucks,"375 Pearl st, New York, NY 10038"
1,40.707142,-74.004987,Starbucks,"135 John Street, New York, NY 10038"
2,40.708411,-74.007404,Starbucks,"100 William Street, New York, NY 10038"


In [33]:
#Merge Columns
df['Coordinate'] = '('+df['lat'].astype(str) + ', ' + df['lng'].astype(str)+')'
df.head(3)

Unnamed: 0,lat,lng,store,address,Coordinate
0,40.711,-74.000878,Starbucks,"375 Pearl st, New York, NY 10038","(40.711, -74.000878)"
1,40.707142,-74.004987,Starbucks,"135 John Street, New York, NY 10038","(40.707142, -74.004987)"
2,40.708411,-74.007404,Starbucks,"100 William Street, New York, NY 10038","(40.708411, -74.007404)"


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 288 entries, 0 to 287
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   lat         288 non-null    float64
 1   lng         288 non-null    float64
 2   store       288 non-null    object 
 3   address     288 non-null    object 
 4   Coordinate  288 non-null    object 
dtypes: float64(2), object(3)
memory usage: 11.4+ KB


In [41]:
df["Coordinate"] = df["Coordinate"].astype(tuple)

TypeError: dtype '<class 'tuple'>' not understood

In [34]:
#New York Empire State Building Lat Long (Looking for any approx center point)
NY_coord = (40.748488, -73.985238)

In [35]:
ass=df.itertuples
type(ass)

method

In [36]:
df.columns

Index(['lat', 'lng', 'store', 'address', 'Coordinate'], dtype='object')

**1. Compute the distance of the stores**

In [40]:
#Compute distance of every store from city center
distances_km = []

for row in df.itertuples(index=False):
    distances_km.append(
       geopy.distance.distance(NY_coord, row.Coordinate.astype(tuple)).km
   )

df['Distance'] = distances_km
df.head(10)

AttributeError: 'str' object has no attribute 'astype'

In [None]:
for i, row in df.iterrows():
  lat = df.at[i, 'lat']
  print(lat)
  lng = df.at[i, 'lng']
  print(lng)
  store = df.at[i, 'store']
  print(store)
  print(i)
  print()
  print(type(row))
  print()
  print()
  print(row)
  
  break


40.711
-74.000878
Starbucks
0

<class 'pandas.core.series.Series'>


lat                                     40.711
lng                                 -74.000878
store                                Starbucks
address       375 Pearl st, New York, NY 10038
Coordinate                (40.711, -74.000878)
Distance                              4.367628
Name: 0, dtype: object


In [None]:
nn = folium.Map(location=NY_coord, zoom_start= 15)
type(nn)

folium.folium.Map

In [None]:
j=folium.Marker(location=[lat,lng], popup=store, icon= folium.Icon(color=color)).add_to(nn)
type(j)

NameError: ignored

**2.Visualize data on a map**

In [None]:
#Empire State Building coordinates
m = folium.Map(location=NY_coord, zoom_start= 15)

#Place markers for the stores on the map
for i, row in df.iterrows():
  lat = df.at[i, 'lat']
  lng = df.at[i, 'lng']
  store = df.at[i, 'store']

  if store == 'McDonalds':
    color = 'red'
  else:
    color = 'green'

  folium.Marker(location=[lat,lng], popup=store, icon= folium.Icon(color=color)).add_to(m)

m

**3. Compute number of stores located in a given radius**

In [None]:
#All stores at a distance greater/less than x kms
df[df['Distance'] > 10]

**4. Compute the closest and farthest stores from Empire State Building**



In [None]:
df_farthest = df.iloc[df.groupby('store')['Distance'].agg(pd.Series.idxmax)]
df_closest = df.iloc[df.groupby('store')['Distance'].agg(pd.Series.idxmin)]
df_new = pd.concat([df_farthest, df_closest])

In [None]:
ds=df.groupby('store')['Distance'].agg(pd.Series.idxmax)
ds.index[:]

In [None]:
df_new

In [None]:
#Empire State Building coordinates
m = folium.Map(location=[40.748488, -73.985238], zoom_start= 10)

for i, rown in df_new.iterrows():
  lat = df_new.at[i, 'lat']
  lng = df_new.at[i, 'lng']
  store = df_new.at[i, 'store']

  if store == 'McDonalds':
    color = 'blue'
  else:
    color = 'green'

  folium.Marker(location=[lat,lng], popup=store, icon= folium.Icon(color=color)).add_to(m)

m

# Coffee king

In [None]:
!pip install geopandas

In [None]:


def mcdonalds():
    data = requests.get("https://www.mcdonalds.com/googleappsv2/geolocation?latitude=40.7830603&longitude=-73.9712488&radius=8.045&maxResults=30&country=us&language=en-us").json()
    
    result = []
    for row in data['features']:
        geom,prop = row['geometry'], row['properties']
        result.append({
            'store': 'McDonalds',
            'lat': geom['coordinates'][1],
            'lng': geom['coordinates'][0],
            'address': prop['addressLine1'],
        })

    return pd.DataFrame(result)


def starbucks():

    
    #Return a DataFrame with Manhattan Starbucks (store, address, lat, lng)I went to https://www.starbucks.com/store-locator and moved the map around to get center points that seem to cover Manhattan.
    
    
    map_points = (
      [40.709353, -74.002588],
      [40.72899, -73.982323],
      [40.741837, -74.008072],
      [40.741317, -73.972179],
      [40.761994, -73.959133],
      [40.763489, -73.989517],
      [40.763814, -74.01415],
      [40.781312, -73.98617],
      [40.773145, -73.960995],
      [40.79807, -73.977686],
      [40.786317, -73.928566],
      
    )

    result = []

    for lat, lng in map_points:
        url = f'https://www.starbucks.com/bff/locations?lat={lat}&lng={lng}'
        
        data = requests.get(url, headers={'x-requested-with': 'XMLHttpRequest'}).json()


        for row in data['stores']:
            result.append({
              'store': 'Starbucks',
              'lat': row['coordinates']['latitude'],
              'lng': row['coordinates']['longitude'],
              'address': ', '.join(row['addressLines'])
            })
    return pd.DataFrame(result).drop_duplicates()

def scrape_stores(target, overwrite=False):
    if not overwrite and os.path.exists(target):
        print(target, 'already exists. not scraping')
        return pd.read_excel(target)
    
    data = pd.concat([
        mcdonalds(),
        starbucks(),
    ])

    data.to_excel(target, index=False)
    return data

def manhatten_map():
    #load population and census shape file and adata 
    cols  =  ['BoroName','FIPSCode','BoroCode','CT2010','Pop2000','Pop2010','Change','ChangePC','Acres','Density']

    pop = pd.read_excel('/content/t_pl_p5_ct.xlsx', skiprows=range(7), header=None, names=cols, dtype=object)

    shape  = geopandas.read_file('/content/nyct2010.shp')

    # Filter for Manhattan 
    shape  =  shape.merge(pop.loc[:, ['CT2010', 'Pop2010']], on='CT2010')

    if not os.path.exists('map'):
        os.makedirs('map')

        # convert lat-long coordinate system and save 
    #shape  = CRS('EPSG:4326').to_proj4()
    crs=pyproj.CRS("epsg:4326")
    shape = shape.to_crs(crs)
    shape['Pop2010'] = shape['Pop2010'].astype(int)
    shape.to_file('map/population.shp')
    # save as a single blog for use by reshaper 

    shape.dissolve(by='BoroName')[['BoroCode', 'geometry']].to_file('map/single.shp')

def stores_map(stores, shape):
    geom = geopandas.points_from_xy(stores.lng,stores.lat)
    points = geopandas.GeoDataFrame(stores, geometry=geom)
    #points = points[points.intersects(shape.unary_union)]
    
    points.loc[:,['lat', 'lng','store','address']].to_csv('manhatton-stores.csv', index=False, encoding='utf8')
    points.to_file('map/points.shp')

if __name__ == '__main__':
    stores = scrape_stores('stores.xlsx', overwrite=False)
    shape = manhatten_map()
    stores_map(stores, shape)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Assignment

## Q. 3,4,5]

In [14]:
s = "i`ve been sick for the past few days and thus, my hair looks wierd. if i didnt have a hat on it would look... http://tinyurl.com/mnf4kw"
get_polarity_and_subjectivity(s)

Polarity: -0.3880952380952381
Subjectivity: 0.40238095238095245


In [15]:
s = " traumatizing moment of my childhood. my dogs massacred baby bunnies and my brother and i got out the bb gun..."
get_polarity_and_subjectivity(s)

Polarity: 0.0
Subjectivity: 0.0


## Q. 6]

In [17]:
from geopy.geocoders import Nominatim
locator = Nominatim(user_agent="myGeocoder")

In [23]:
location = locator.geocode("Panipat")
paniput = ( location.latitude , location.longitude) 
paniput

(29.3912753, 76.9771675)

In [24]:
location = locator.geocode("Durgapur")
durgapur = ( location.latitude , location.longitude) 
durgapur

(23.5350475, 87.3380425)

In [26]:
GRC(paniput, durgapur).km

1218.8356860888455

In [27]:
GD(paniput, durgapur).km

1219.1448225210377

In [30]:
geopy.distance.distance(paniput, durgapur).km

1219.1448225210377

# END