## Calculating Single Point Haversine, distance from 0,0
 1.	Import libraries & set directory paths
 2.	Import Portal data, check dimensions and data types
 3.	Create ‘Single Point Haversine’ to represent Latitude and Longitude as one value.
    -	Import new library: from math import radians, cos, sin, asin, sqrt
    -	User defined function to convert Lat. & long. to Single Point Haversine.
    -	Apply conversion
 4.	Export to: stone_signs_portal_v1.2_haversine - 29,255.csv

### import libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os

### set original data set directory path

In [2]:
dataset = r'D:\My Documents\! Omnicompetent Ltd\Career Foundry - Data Analytics\Data Analytics Course\60 Advanced Analytics and Dashboard Design\Stone Signs Portal Analysis\02 Data'
dataset

'D:\\My Documents\\! Omnicompetent Ltd\\Career Foundry - Data Analytics\\Data Analytics Course\\60 Advanced Analytics and Dashboard Design\\Stone Signs Portal Analysis\\02 Data'

### import data set

In [3]:
df_portal = pd.read_csv(os.path.join(dataset,'stone_signs_portal_v1.1 - 29,255.csv'))
df_portal.head()

Unnamed: 0,request_id,request_datetime,completed_datetime,completed_duration_days,completed_duration_hours,agent_name,agent_code,branch_name,branch_code,sign_category,...,quantity,additional_info,request_status,payment_status,net_charge,vat,gross_charge,invoice_datetime,paid_datetime,payment_duration_days
0,332,02/01/2020 09:18,02/01/2020 15:55,0.28,6.6,Hyman Hill,HYH,HymanHill @Shoreham-by-Sea,530,Sales,...,1,two boards please as on corner plot with one b...,Completed,Paid,3.85,0.77,4.62,01/02/2020 15:46,26/02/2020 19:13,25.1
1,334,02/01/2020 10:32,02/01/2020 14:47,0.18,4.3,Michael Jones,MCJ,MichaelJones @LancingSales(&Bacon),502,Sales,...,1,,Completed,Paid,3.8,0.76,4.56,01/02/2020 17:02,29/02/2020 10:27,27.7
2,335,02/01/2020 10:40,04/01/2020 12:20,2.07,49.7,zz Symonds & Reading,ZZ4,zz Symonds&Reading @Ferring,350,Sales,...,1,,Completed,Paid,3.8,0.76,4.56,01/02/2020 17:13,26/02/2020 19:12,25.1
3,336,02/01/2020 10:58,02/01/2020 14:42,0.16,3.7,Michael Jones,MCJ,MichaelJones @LancingSales(&Bacon),502,Sales,...,1,,Completed,Paid,3.8,0.76,4.56,01/02/2020 17:02,29/02/2020 10:27,27.7
4,337,02/01/2020 11:10,03/01/2020 16:10,1.21,29.0,Rowland Gorringe,ROW,RowlandGorringe @Seaford,950,Sales,...,1,,Completed,Paid,3.8,0.76,4.56,01/02/2020 17:06,29/02/2020 10:27,27.7


### check dimensions & data types

In [4]:
df_portal.shape

(29255, 30)

In [5]:
df_portal.dtypes

request_id                    int64
request_datetime             object
completed_datetime           object
completed_duration_days     float64
completed_duration_hours    float64
agent_name                   object
agent_code                   object
branch_name                  object
branch_code                   int64
sign_category                object
sign_action                  object
board_type                   object
sign_type                    object
address_1                    object
address_2                    object
address_3                    object
town                         object
postcode                     object
latitude                    float64
longitude                   float64
quantity                      int64
additional_info              object
request_status               object
payment_status               object
net_charge                  float64
vat                         float64
gross_charge                float64
invoice_datetime            

### convert latitude and longitude into a single_point_haversine

### create subset for latitude and longitude only

In [6]:
df_lat_long = df_portal[['latitude','longitude']]
df_lat_long

Unnamed: 0,latitude,longitude
0,50.842528,-0.254277
1,50.825436,-0.319448
2,50.808270,-0.447493
3,50.833847,-0.314048
4,50.778295,0.097399
...,...,...
29250,50.822110,-0.317679
29251,50.799978,-0.004562
29252,50.833551,-0.128801
29253,50.840330,-0.272710


### convert latitude and longitude to a single value
   based on info sourced from internet

In [7]:
from math import radians, cos, sin, asin, sqrt

### create user defined function "single_pt_haversine"

In [8]:
def single_pt_haversine(lat, lng, degrees=True):
    #'Single-point' Haversine: Calculates the great circle distance between a point on Earth and the (0, 0) lat-long coordinate

    #r = 6371 # Earth's radius km
    r = 3956  # Earth's radius in miles

    # Convert decimal degrees to radians
    if degrees:
        lat, lng = map(radians, [lat, lng])

    # 'Single-point' Haversine formula
    a = sin(lat/2)**2 + cos(lat) * sin(lng/2)**2
    d = 2 * r * asin(sqrt(a)) 

    return d

### make conversion

In [9]:
df_lat_long['haversine_distance_miles'] = [single_pt_haversine(latitude, longitude) for latitude, longitude in zip(df_lat_long.latitude, df_lat_long.longitude)]
df_lat_long.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_lat_long['haversine_distance_miles'] = [single_pt_haversine(latitude, longitude) for latitude, longitude in zip(df_lat_long.latitude, df_lat_long.longitude)]


Unnamed: 0,latitude,longitude,haversine_distance_miles
0,50.842528,-0.254277,3510.465548
1,50.825436,-0.319448,3509.303756
2,50.80827,-0.447493,3508.166825
3,50.833847,-0.314048,3509.882854
4,50.778295,0.097399,3506.003486


### try on full dataframe

In [10]:
df_portal['haversine_distance_miles'] = [single_pt_haversine(latitude, longitude) for latitude, longitude in zip(df_portal.latitude, df_portal.longitude)]
df_portal[['latitude','longitude','haversine_distance_miles']].head()

Unnamed: 0,latitude,longitude,haversine_distance_miles
0,50.842528,-0.254277,3510.465548
1,50.825436,-0.319448,3509.303756
2,50.80827,-0.447493,3508.166825
3,50.833847,-0.314048,3509.882854
4,50.778295,0.097399,3506.003486


### export to CSV

In [11]:
df_portal.shape

(29255, 31)

In [12]:
df_portal.to_csv(os.path.join(dataset,'stone_signs_portal_v1.2_haversine - 29,255.csv'), index=False)