In [128]:
import hvplot.pandas
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

import pandas as pd
import plotly.express as px
import requests
import json
import numpy as np

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

# Import the API key
from api_key import geoapify_key

In [60]:
clean_fraud_df = pd.read_csv("../Resources/fraud_test.csv")

In [150]:
# Create a DataFrame with the unique states, the count of frauds
fraud_df = clean_fraud_df[clean_fraud_df["is_fraud"]==1]
total_fraud = fraud_df["trans_num"].nunique()
state_fraud = pd.DataFrame(fraud_df.groupby("state")["trans_num"].nunique()).reset_index()
state_fraud["percent"] = round(state_fraud["trans_num"] / total_fraud * 100,2)

In [151]:
states = clean_fraud_df["state"].unique()

for state in states:
    if state not in state_fraud["state"].values:
        state_fraud.loc[len(state_fraud)+1,"state"]=state
        state_fraud.loc[len(state_fraud),"trans_num"]=0
        state_fraud.loc[len(state_fraud),"percent"]=0

In [156]:
map = px.choropleth(state_fraud,
                    locations= state_fraud["state"],
                    locationmode= "USA-states",
                    color= state_fraud["trans_num"],
                    scope= "usa",
                    color_continuous_scale= "purples",
                    labels={"trans_num": "Number of <br>Transactions"},
                    title= "<b>Fraud Transactions per State",
                    hover_data=["percent"],
                    width=800,
                    height=450          
)
map.add_scattergeo(locations=state_fraud["state"],
                   locationmode="USA-states", 
                   text=state_fraud["state"],
                   mode='text',
                   hoverinfo="skip",
                   textfont_size=10
)
map.update_layout(title_font_size=20,
                  title_x=0.5,
                  margin = dict(l=0,
                                r=0,
                                b=10,
                                t=60,
                                autoexpand=True
                  )
)
map.show()

In [64]:
# Iterate through the hotel_df DataFrame
for index, row in fraud_df.iterrows():
    
    # Get Lat and Lon from the data frame
    Lat = fraud_df.loc[index,"merch_lat"]
    Lon = fraud_df.loc[index,"merch_long"]
    
    # Add filter and bias parameters
    params = {"apiKey": geoapify_key,
              "lat": Lat,
              "lon": Lon
    }
    
    # Set base URL
    base_url = "https://api.geoapify.com/v1/geocode/reverse?"

    # Make and API request using the params dictionaty
    merch_info = requests.get(base_url, params=params).json()

    # Grab the first hotel from the results and store the name in the hotel_df DataFrame
    try:
        fraud_df.loc[index, "merch_country"] = merch_info["features"][0]["properties"]["country"]
        fraud_df.loc[index, "merch_state"] = merch_info["features"][0]["properties"]["state"]
    except (KeyError, IndexError):
        # If no hotel is found, set the hotel name as "No hotel found".
        fraud_df.loc[index, "merch_country"] = "No Country found"
        fraud_df.loc[index, "merch_state"] = "No State found"

fraud_df.to_csv("../Resources/fraud_test_merchant.csv")


In [162]:
fraud_merchant_df = pd.read_csv("../Resources/fraud_test_merchant.csv")

In [None]:
for index,row in fraud_merchant_df.iterrows():
    if row["state"] == row["State"]

In [159]:
fraud_merchant_df.shape

(2145, 26)

In [163]:
state_merch_fraud = pd.DataFrame({"trans_num": fraud_merchant_df.groupby("merch_state")["trans_num"].nunique(),
                                    "amt": fraud_merchant_df.groupby("merch_state")["amt"].median(),
}).reset_index()

In [164]:
state_merch_fraud["trans_num"].sum()

2145

In [145]:
# Build scatter plot for latitude vs. temperature


fig = px.scatter(state_merch_fraud, x=state_merch_fraud['trans_num'], y=state_merch_fraud['amt'], color='merch_state', hover_name='merch_state')

fig.update_traces(textposition='top center')  # Position the state name on top of the data point

fig.show()

# Show plot
plt.show()