In [1]:
import numpy as np
import pandas as pd
import sys,io
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
import folium
from folium import plugins
import datetime as dt
from datetime import datetime as dtf
from datetime import timedelta
from calendar import monthrange
import os

The import statements above are Python libraries that our program requires to be available to perform analysis on the data set we are training and analysing in this project. pandas, numpy, pyplot and datetime were given an alias to ease the use in the program
- numpy is a popular library that is essential in numerical computing in Python that is mainly used for array manipulation, mathematiacl operation and linear algebra
- pandas is a popular library that is essential in data manipulation and analysis
- sys is module that provides access to system-specific parameters and functions.
- io is a module that provides access to use the methods/types in the io module
- train_test_split is used to split a dataset into training and testing sets for machine learning
- LinearRegression is used to implement linear regression algorithm for regression tasks
- matplotlib.pyplot is used for the creation of visualization
- confusion_matrix is used to generate confusion matrix which shows data that falls under True Positive, False Positive, True Negative and False Negative
- mean_squared_error is used to calculate the mean_squared error of the regression model. The mean squared error represents the average squared residual. As the data points fall closer to the regression line, the model has less error, decreasing the MSE.
- folium is used to creating interactive maps
- plugins is a module imported from folium library that provides additional functionality for creating interactive maps, such as markers, heatmaps, and clusters
- datetime is a module that provides classes for wking with date and time in Python
- timedelta is a class used for manipulating date and time by defining specific duration
- monthrange is a function that returns the number of days in a month and the day of the week of the first day of the month
- os is used to provides a way of interacting with the operating system in Python, such as navigating directories, creating files, and running system commands


- CountVectorizer is used to convert text data into numerical format for machine learning
- MultinomialNB is used to implement the Multinomial Naive Bayes algorithm for text classification
- accuracy_score is used to calculate accuracy of a classification model
- classification_report is used to generate classification report 
- precision_score is used to calculate the precision of a classification model. Precision is calculated as the number of true positives divided by the total number of true positives and false positives. Result is between 0.0 for no precision and 1.0 for full or perfect precision.
- LogisticRegression is used to implement logistic regression algorith for binary classicication tasks.
- seaborn is used for the creation of date visualization that provides high-level interface for representing attractive and informative statistical graphics

In [2]:
output_path = r'D:\Important2\gitworks\CabSurgeArea\data\Output'
myMapHTMLFilePath = str(output_path)+ r"\mymap.html"
colorCombo = {0:"#87CEEB",1:"#F0E68C",2:"#D2691E",3:"#90EE90"}
fcolorCombo = {0:"#CFEBF7",1:"#F9F5D1",2:"#F1C2A0",3:"#D3F8D3"}
#print(os.getcwd())

- 'output_path' is a variable that will store the path to a directory where the output of program will be stored. It is preceded with 'r' which stands for "raw" and will cause backslashes in the string to be interpreted as actual backslashes rather than special characters.
- myMapHTMLFilePath is a variable that  will store the path to a directory where the HTML file that will be the output of program will be stored. There is a concatenation happening for the path specified in the 'output_path' variable and '\mymap.html'
- colorcombo (Fontcolor) is a variable that stores dictionary that maps the integers to color codes in the form of hex codes. Corresponding colors of the Hex Code or Value can be found at https://www.webucator.com/article/python-color-constants-module/)
- fcolorCombo (Fillcolor) is a variable that stores dictionary that maps the integers to color codes in the form of hex codes. Corresponding colors of the Hex Code or Value can be found at https://www.webucator.com/article/python-color-constants-module/)

In [3]:
class Color:
    BLACK = "\033[30m"
    RED = "\033[31m"
    GREEN = "\033[32m"
    YELLOW = "\033[33m"
    BLUE = "\033[34m"
    MAGENTA = "\033[35m"
    CYAN = "\033[36m"
    WHITE = "\033[37m"
    RESET = "\033[0m"

c = Color()

The program above defines a class Color which is used to present the output report with colors. For cosmetic use only.

In [4]:
def launchProgram():
    print(f"{c.CYAN}+++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print(f"{c.CYAN}+                CAB SURGE AREA                     +")
    print(f"{c.CYAN}+       BDM 1034 Midterm Project - Group 3          +")
    print(f"{c.CYAN}+++++++++++++++++++++++++++++++++++++++++++++++++++++{c.RESET}\n")

launchProgram()

[36m+++++++++++++++++++++++++++++++++++++++++++++++++++++
[36m+                CAB SURGE AREA                     +
[36m+       BDM 1034 Midterm Project - Group 3          +
[36m+++++++++++++++++++++++++++++++++++++++++++++++++++++[0m



The program above displays the Project Name, Subject and Group Name

In [5]:

#for dirname, _, filenames in os.walk('d:\\Important2\\gitworks\\advPython\\data\\Input'):
#for dirname, _, filenames in os.walk(r'D:\Important2\gitworks\CabSurgeArea\data'):
for dirname, _, filenames in os.walk(r'C:\Users\dambz\OneDrive\Documents\GitHub\group3_bdm1034\data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

C:\Users\dambz\OneDrive\Documents\GitHub\group3_bdm1034\data\Input\rideshare_kaggle.csv
C:\Users\dambz\OneDrive\Documents\GitHub\group3_bdm1034\data\Input\rideshare_kaggle.csv.zip
C:\Users\dambz\OneDrive\Documents\GitHub\group3_bdm1034\data\Output\mymap.html


The program above will perform a loop to check all the files under data directory and prints the list of all the file paths in the directory specified above.

In [6]:
# Reads csv file and stores in pandas DataFrame 'dfdata2'
#data2 = r'D:\Important2\gitworks\CabSurgeArea\data\Input\rideshare_kaggle.csv'
data2 = r'C:\Users\dambz\OneDrive\Documents\GitHub\CabSurgeArea\data\Input\rideshare_kaggle.csv'

dfdata2 = pd.read_csv(data2,header=0)

The program above reads a csv file named rideshare_kaggle.csv located in the directory specified in 'data2' and stores the data into a pandas DataFrame named 'dfdata2'. This dataframe will then be used in this program for data analysis.

In [7]:
# Load the dataset

print(f"{c.GREEN} Count of Dataset: {c.RESET}\n {dfdata2.count()}\n") # Displays the count of records available for a particular field
print(f"{c.GREEN} Data Frame Shape: {c.RESET}\n {dfdata2.shape}\n") # Displays the dimension of the data which is 693071 rows and 57 columns
print(f"{c.GREEN} Preview of Dataset: {c.RESET}") # Displays a preview of the dataset
dfdata2.head(10)


[32m Count of Dataset: [0m
 id                             693071
timestamp                      693071
hour                           693071
day                            693071
month                          693071
datetime                       693071
timezone                       693071
source                         693071
destination                    693071
cab_type                       693071
product_id                     693071
name                           693071
price                          637976
distance                       693071
surge_multiplier               693071
latitude                       693071
longitude                      693071
temperature                    693071
apparentTemperature            693071
short_summary                  693071
long_summary                   693071
precipIntensity                693071
precipProbability              693071
humidity                       693071
windSpeed                      693071
windGust            

Unnamed: 0,id,timestamp,hour,day,month,datetime,timezone,source,destination,cab_type,...,precipIntensityMax,uvIndexTime,temperatureMin,temperatureMinTime,temperatureMax,temperatureMaxTime,apparentTemperatureMin,apparentTemperatureMinTime,apparentTemperatureMax,apparentTemperatureMaxTime
0,424553bb-7174-41ea-aeb4-fe06d4f4b9d7,1544953000.0,9,16,12,2018-12-16 09:30:07,America/New_York,Haymarket Square,North Station,Lyft,...,0.1276,1544979600,39.89,1545012000,43.68,1544968800,33.73,1545012000,38.07,1544958000
1,4bd23055-6827-41c6-b23b-3c491f24e74d,1543284000.0,2,27,11,2018-11-27 02:00:23,America/New_York,Haymarket Square,North Station,Lyft,...,0.13,1543251600,40.49,1543233600,47.3,1543251600,36.2,1543291200,43.92,1543251600
2,981a3613-77af-4620-a42a-0c0866077d1e,1543367000.0,1,28,11,2018-11-28 01:00:22,America/New_York,Haymarket Square,North Station,Lyft,...,0.1064,1543338000,35.36,1543377600,47.55,1543320000,31.04,1543377600,44.12,1543320000
3,c2d88af2-d278-4bfd-a8d0-29ca77cc5512,1543554000.0,4,30,11,2018-11-30 04:53:02,America/New_York,Haymarket Square,North Station,Lyft,...,0.0,1543507200,34.67,1543550400,45.03,1543510800,30.3,1543550400,38.53,1543510800
4,e0126e1f-8ca9-4f2e-82b3-50505a09db9a,1543463000.0,3,29,11,2018-11-29 03:49:20,America/New_York,Haymarket Square,North Station,Lyft,...,0.0001,1543420800,33.1,1543402800,42.18,1543420800,29.11,1543392000,35.75,1543420800
5,f6f6d7e4-3e18-4922-a5f5-181cdd3fa6f2,1545071000.0,18,17,12,2018-12-17 18:25:12,America/New_York,Haymarket Square,North Station,Lyft,...,0.0221,1545066000,34.19,1545048000,40.66,1545022800,27.39,1545044400,34.97,1545080400
6,462816a3-820d-408b-8549-0b39e82f65ac,1543209000.0,5,26,11,2018-11-26 05:03:00,America/New_York,Back Bay,Northeastern University,Lyft,...,0.1245,1543251600,40.67,1543233600,46.46,1543255200,37.45,1543291200,43.81,1543251600
7,474d6376-bc59-4ec9-bf57-4e6d6faeb165,1543780000.0,19,2,12,2018-12-02 19:53:04,America/New_York,Back Bay,Northeastern University,Lyft,...,0.0916,1543770000,36.32,1543726800,50.8,1543788000,35.84,1543748400,50.13,1543788000
8,4f9fee41-fde3-4767-bbf1-a00e108701fb,1543818000.0,6,3,12,2018-12-03 06:28:02,America/New_York,Back Bay,Northeastern University,Lyft,...,0.0004,1543852800,43.09,1543896000,57.02,1543852800,39.9,1543896000,56.35,1543852800
9,8612d909-98b8-4454-a093-30bd48de0cb3,1543316000.0,10,27,11,2018-11-27 10:45:22,America/New_York,Back Bay,Northeastern University,Lyft,...,0.1425,1543338000,36.34,1543377600,46.91,1543320000,32.43,1543377600,44.01,1543320000


The program above displays statistics about the 'dfdata2' DataFrame.
- dfdata2.count() => displays the count of data for each field/column
- dfdata2.shape() => displays the dimension of the data which is 693,071 rows and 57 columns
- dfdata2.head(10) => displays the first 10 rows with complete columns of the 'dfdata2' DataFrame that provides a glimpse of the data in tabular form

In [8]:
print(f"{c.GREEN} Info of Dataset: {c.RESET}") 
dfdata2.info() # Displays the information on the data set includes the index number, column name, non-null columns and data type

[32m Info of Dataset: [0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693071 entries, 0 to 693070
Data columns (total 57 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   id                           693071 non-null  object 
 1   timestamp                    693071 non-null  float64
 2   hour                         693071 non-null  int64  
 3   day                          693071 non-null  int64  
 4   month                        693071 non-null  int64  
 5   datetime                     693071 non-null  object 
 6   timezone                     693071 non-null  object 
 7   source                       693071 non-null  object 
 8   destination                  693071 non-null  object 
 9   cab_type                     693071 non-null  object 
 10  product_id                   693071 non-null  object 
 11  name                         693071 non-null  object 
 12  price                        6

The program above displays information about the 'dfdata2' DataFrame.
- dfdata2.info() => displays the summarized information of 'dfdata2' DataFrame. The information incldes the index number, column names, non-null columns and data type of each column. This information can be used to further undestand DataFrame structure that can be an essential input to data cleaning, wrangling and data analysis in general.

In [9]:
dfdata2["datetime"] =  pd.to_datetime(dfdata2["datetime"]) # Takes the value of 'datetime' column from 'dfdata2' DataFrame and converts it to pandas datetime format
dfdata2["theDay"] = dfdata2["datetime"].dt.day_of_week # Creates a new column called 'theDay' into 'dfdata2' DataFrame and contains the day of the week
dfdata2["theDay"].head() # Displays the first five rows of the 'theDay' column

0    6
1    1
2    2
3    4
4    3
Name: theDay, dtype: int64

The program above creates a new column called 'theDay' into 'dfdata2' DataFrame.
- pd.to_datetime(dfdata2["datetime"]) => Takes the value of 'datetime' column from 'dfdata2' DataFrame and converts it to pandas datetime format which will then be used for data analysis related to data and time
- dfdata2["datetime"].dt.day_of_week => Creates a new column called 'theDay' into 'dfdata2' DataFrame and contains the day of the week. '.dt' is used to access  the datetimee attribut of 'datetime' column while the '.day_of_week' manages to get the day of the week for each record in 'datetime' column.
0 = Monday
1 = Tuesday
2 = Wednesday
3 = Thursday
4 = Friday
5 = Saturday
6 = Sunday
- dfdata2["theDay"].head() => Displays the first five rows of the 'theDay' column as default since we didn't specify a number in the parenthesis

In [10]:
mysources = dfdata2['source'].unique() # Extracts the distinct or unique values from the 'source' column from 'dfdata2' DataFrame and assigns the value to 'mysources'
imysources = range(len(mysources)) # Range object with the same length as 'mysources'
# For deletion print(mysources)
print(f"{c.GREEN} List of Unique Source: {c.RESET} {mysources}")

[32m List of Unique Source: [0m ['Haymarket Square' 'Back Bay' 'North End' 'North Station' 'Beacon Hill'
 'Boston University' 'Fenway' 'South Station' 'Theatre District'
 'West End' 'Financial District' 'Northeastern University']


The program above creates a new variable called 'imysources' extracted from list of distinct or unique 'source' from 'dfdata2' DataFrame, reassigns to new variable 'imysources' and prints the distinct or unique 'source' list
- mysources = dfdata2['source'].unique() => Extracts the distinct or unique values from the 'source' column from 'dfdata2' DataFrame and assigns the value to 'mysources'
- imysources = range(len(mysources)) => Creates a new variable called 'imysources' which is a range object with the same length as 'mysources'. This part is essential in creating some mapping between the unique values and the corresponding integer code later

In [11]:
dfdata2["SourceCode"] = dfdata2["source"]
print(f"{c.GREEN} Unique Source and Record Count: {c.RESET}")
print(dfdata2["SourceCode"].value_counts())

[32m Unique Source and Record Count: [0m
Financial District         58857
Theatre District           57813
Back Bay                   57792
Boston University          57764
North End                  57763
Fenway                     57757
Northeastern University    57756
South Station              57750
Haymarket Square           57736
West End                   57562
Beacon Hill                57403
North Station              57118
Name: SourceCode, dtype: int64


The program above creates a new column called 'SourceCode' into 'dfdata2' DataFrame by replicating the values from 'source' column and prints the count afterwards
- dfdata2["SourceCode"] = dfdata2["source"] => Creates a new column called 'SourceCode' into 'dfdata2' DataFrame by replicating the values from 'source' column
- print(dfdata2["SourceCode"].value_counts()) => Prints thes unique sources and record count using value_counts() method on the 'SourceCode' DataFrame column

In [12]:
dfdata2["SourceCode"].replace(mysources,imysources,inplace=True)
print(f"{c.GREEN} Converted Numerical Value of Source and Record Count: {c.RESET}")
print(dfdata2["SourceCode"].value_counts())

[32m Converted Numerical Value of Source and Record Count: [0m
10    58857
8     57813
1     57792
5     57764
2     57763
6     57757
11    57756
7     57750
0     57736
9     57562
4     57403
3     57118
Name: SourceCode, dtype: int64


The program above converts the categorical data values from 'SourceCode" column into numerical values using mapping
- dfdata2["SourceCode"].replace(mysources,imysources,inplace=True) => Replaces the categorical data values in the "SourceCode" column using a mapping dictionary "mysources" then assigns the converted values to the same column using the inplace=True parameter. Mapping dictionary contains the original source names (e.g. Financial District, Theatre District) as keys and the corresponding numerical codes as values
- print(dfdata2["SourceCode"].value_counts()) => Prints thes unique sources and record count using value_counts() method on the 'SourceCode' DataFrame column but this time using the converted numerical values

In [26]:
dfdata1 = dfdata2[['theDay','hour','SourceCode']].value_counts().reset_index(name='Count')
dfdata1.head()

Unnamed: 0,theDay,hour,SourceCode,Count
0,1,0,5,742
1,1,0,9,733
2,1,3,2,691
3,1,1,2,690
4,1,5,8,678


The program above creates a new DataFrame named 'dfdata1' from 'dfdata2' with 3 columns 'theDay','hour' and 'SourceCode'. It will then applies the value_counts() on the 2 columns to count the occurence of each combination of 'theDay','hour' and 'SourceCode'. Count will will then be passed on to new column "Count'. It then prints the first 5 rows on default using the new DataFrame dfdata1.head() method with all 4 columns described above. 

In [14]:
def calc_demand(frame):
    max_count = dfdata1['Count'].max()
    maxbyFour = max_count/4
    if frame['Count'] <= round(maxbyFour,0):
        val = 0
    elif frame['Count'] > round(maxbyFour,0) and frame['Count'] <= round((maxbyFour*2),0):
        val = 1
    elif frame['Count'] > round(maxbyFour*2,0) and frame['Count'] <= round((maxbyFour*3),0):
        val = 2
    else:
        val = 3
    return val

In [15]:
x = dfdata1.iloc[:,:-1]
y = dfdata1.iloc[:,-1:]
print(x.head(),y.head())

   theDay  hour  SourceCode
0       1     0           5
1       1     0           9
2       1     3           2
3       1     1           2
4       1     5           8    Count
0    742
1    733
2    691
3    690
4    678


In [16]:
#Distributing the data into Test and Train
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

The program above is used in distributing the data into Training and Testing Sets using the train_test_split function. Input features are stored in DataFrame '.x' while the output target variable is stored in DataFrame 'y'.
- test_size set to 0.1 which means 10% of the data will be used for testing, while 90% will be used for training
- random_state is set to 42. This is used  by the number generator to reproduce the result.
The result of this program is to return 4 arrays X_train (Input features for the training set), X_test (Input features for the testing set), y_train (Output target variable for the Training Set) and y_test (Output target variable for testing set).

In [17]:
#Fit the train data into linear regression model and find coef,and intercept 

lr = LinearRegression()
lr.fit(X_train, y_train)

''' For deletion
print("Coefficeint of determination: ",lr.score(X_train,y_train))
print("Slope: ",lr.coef_)
print("Intercept: ",lr.intercept_)
'''

print(f"{c.GREEN} Coefficient of determination: {c.RESET} {lr.score(X_train,y_train)}")
print(f"{c.GREEN} Slope: {c.RESET} {lr.coef_}")
print(f"{c.GREEN} Intercept: {c.RESET} {lr.intercept_}")

[32m Coefficient of determination: [0m 0.21545793168748817
[32m Slope: [0m [[-21.73742033  -0.95624998   0.26086498]]
[32m Intercept: [0m [430.62846627]


In [18]:
#Predict the test data and find predicted values of y

y_pred = lr.predict(X_test)

#print(y_pred)

y_pred = np.around(y_pred)
y_pred = np.asarray(y_pred,dtype='int')
final = pd.DataFrame(y_pred,columns=['Count'])

print(final.value_counts())


Count
301      4
430      4
371      4
377      4
355      3
        ..
310      1
386      1
341      1
388      1
433      1
Length: 110, dtype: int64


In [19]:
final["demand_lvl"] = final.apply(calc_demand,axis=1)
print(final)
print(final['demand_lvl'].value_counts())
#print(dfdata1.info())

     Count  demand_lvl
0      343           1
1      284           1
2      314           1
3      335           1
4      400           2
..     ...         ...
191    303           1
192    324           1
193    320           1
194    433           2
195    371           1

[196 rows x 2 columns]
1    125
2     71
Name: demand_lvl, dtype: int64


# Creating the connection with MongoDB.

In [20]:
from pymongo import MongoClient

conn_str = "example string" #Put you connection string here

client = MongoClient(conn_str)
db = client.CabSurge


# Function to Create the Reference Data.

In [21]:
def createRefData():

    mloc = db.MapLocations

    a = [{
        "ID": 0,
        "Name": "Haymarket Square",
        "Location": {"lat": 42.3610, "lon": -71.0579},
        "Radius": 35,
    },{
        "ID": 1,
        "Name": "Back Bay",
        "Location": {"lat": 42.3506, "lon": -71.0798},
        "Radius": 80,
    },{
        "ID": 2,
        "Name": "North End",
        "Location": {"lat": 42.3651, "lon": -71.0542},
        "Radius": 35,
    },{
        "ID": 3,
        "Name": "North Station",
        "Location": {"lat": 42.3648, "lon": -71.0603},
        "Radius": 30,
    },{
        "ID": 4,
        "Name": "Beacon Hill",
        "Location": {"lat": 42.3587, "lon": -71.0678},
        "Radius": 40,
    },{
        "ID": 5,
        "Name": "Boston University",
        "Location": {"lat": 42.3492, "lon": -71.1043},
        "Radius": 30,
    },{
        "ID": 6,
        "Name": "Fenway",
        "Location": {"lat": 42.3452, "lon": -71.1046},
        "Radius": 30,
    },{
        "ID": 7,
        "Name": "South Station",
        "Location": {"lat": 42.3497, "lon": -71.0550},
        "Radius": 50,
    },{
        "ID": 8,
        "Name": "Theatre District",
        "Location": {"lat": 42.3506, "lon": -71.0641},
        "Radius": 50,
    },{
        "ID": 9,
        "Name": "West End",
        "Location": {"lat": 42.3634, "lon": -71.0658},
        "Radius": 35,
    },{
        "ID": 10,
        "Name": "Financial District",
        "Location": {"lat": 42.3558, "lon": -71.0556},
        "Radius": 45,
    },{
        "ID": 11,
        "Name": "Northeastern University",
        "Location": {"lat": 42.3388, "lon": -71.0881},
        "Radius": 80,
    }
    ]

    mloc.insert_many(a)


## Function call to create the reference data.

In [22]:
#createRefData()

In [23]:
def getPredictions(dtime):
    tempList = []
    for source in mysources:
        for iter in range(0,24):
            d = dtime + timedelta(hours= iter)
            idx = mysources.tolist().index(source)
            tempList.append([d,source,idx])
    tempDF = pd.DataFrame(tempList,columns = ['Date','Source','SourceCode'])
    tempDF['Day'] = tempDF['Date'].dt.strftime('%A')
    tempDF["theDay"] = tempDF["Date"].dt.day_of_week
    tempDF["hour"] = tempDF["Date"].dt.hour
    predCounts = lr.predict(tempDF[['theDay','hour','SourceCode']])
    predCounts = np.around(predCounts)
    predCounts = np.asarray(predCounts,dtype='int')
    predCounts = pd.DataFrame(predCounts,columns=['Count'])
    tempDF['Count'] = predCounts
    tempDF["demand_lvl"] = predCounts.apply(calc_demand,axis=1)

    #tempDF.set_index(list(tempDF)[0])

    #Index(['Date', 'Source', 'SourceCode', 'Day', 'theDay', 'hour', 'Count', 'demand_lvl'],
    #    
    finalDict = {'Runtime' : dtf.now().strftime('%d %b %Y %H:%M:%S %A')}
    #print(finalDict)
    tempList = []
    for i,v in tempDF.iterrows():
        tempList.append({'DateTime' : v[0], 'Source' : v[1], 'SourceCode' : v[2], 'Day' : v[3], 'DayNo' : v[4], 'Hour' : v[5], 'DemandCount' : v[6], 'DemandLevel': v[7]})
    finalDict["24HourData"] = tempList
    pData = db.PredictionData
    pData.insert_one(finalDict)
          
getPredictions(dtf.now())

ServerSelectionTimeoutError: example string:27017: [Errno 11001] getaddrinfo failed, Timeout: 30s, Topology Description: <TopologyDescription id: 640bf56e298bd6536207e12d, topology_type: Unknown, servers: [<ServerDescription ('example string', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('example string:27017: [Errno 11001] getaddrinfo failed')>]>

In [None]:
def generateMap():
    # Create a map using the Map() function and the coordinates for Boulder, CO
    m = folium.Map(location=[42.3505, -71.0760],zoom_start=13.5, control_scale=True)

    mloc = db.MapLocations
    pData = db.PredictionData
    #curRecord = pData.find().sort("_id",1).limit(1)
    for source in mysources:
        rec = mloc.find_one({'Name':source})
        curRecord = pData.find().sort("_id",1).limit(1)
        name = rec.get('Name')
        loc = rec.get('Location')
        r = rec.get('Radius')
        objIndx = 0
        folium.CircleMarker(
            location=[loc['lat'], loc['lon']],
            radius=r,
            popup=name,
            color=colorCombo[curRecord[0]["24HourData"][0]["DemandLevel"]] ,
            fill=True,
            fill_color=fcolorCombo[curRecord[0]["24HourData"][objIndx]["DemandLevel"]],
        ).add_to(m)
        objIndx += 24

    # Display m
    m.save(myMapHTMLFilePath)
    return m


In [None]:
#generateMap()

In [None]:
import datetime
curYear = datetime.date.today().year
curYear

2023

In [None]:
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWebEngineWidgets import QWebEngineView # pip install PyQtWebEngine
from PyQt5 import QtGui,QtCore
import datetime as dt
from datetime import datetime as dtf
from calendar import monthrange


class MyApp(QMainWindow):
    singleton: 'MyApp' = None
    def __init__(self):
        super().__init__()
        self.title = 'Folium in PyQt Example'
        self.main_window()
    
    def main_window(self):
        self.setWindowTitle(self.title)
        self.setGeometry(0, 0, 1000, 700)
        # Make Grid
        self.grid = QGridLayout()
        self.grid.setSpacing(100)
        #Make Radio Buttons
        self.curButton = QRadioButton("Current Date/Time")
        self.curButton.toggled.connect(self.onClicked)

        self.cusButton = QRadioButton("Custom Date/Time")
        self.cusButton.toggled.connect(self.onClicked)

        self.grid.addWidget(self.curButton,0,0,alignment=Qt.AlignLeft)
        self.grid.addWidget(self.cusButton,0,1,alignment=Qt.AlignLeft)

        # Complete layout of Grid
        self.setLayout(self.grid)
        
        curYear = int(dt.date.today().year)
        toYear = curYear+51
        self.cbYear = QComboBox(objectName='Year', currentIndex=0, enabled=True)
        self.cbYear.addItem('Year')
        self.cbYear.addItems([format(x, '02d') for x in range(curYear,toYear)])
        self.cbYear.currentIndexChanged.connect(self.current_text_changed)
        self.cbMonth = QComboBox(objectName='Month', currentIndex=0, enabled=False)
        self.cbMonth.addItem('Month')
        self.cbMonth.addItems([format(x, '02d') for x in range(1,13)])
        self.cbMonth.currentIndexChanged.connect(self.current_text_changed)
        self.cbDay = QComboBox(objectName='Day', currentIndex=0, enabled=False)
        self.cbDay.addItem('Day')
        self.cbDay.currentIndexChanged.connect(self.current_text_changed)
        self.cbHour = QComboBox(objectName='Hour', currentIndex=0, enabled=False)
        self.cbHour.addItem('Hour')
        self.cbHour.currentIndexChanged.connect(self.current_text_changed)
        self.cbHour.addItems([format(x, '02d') for x in range(1,25)])
        self.goButton = QPushButton("Go!",enabled=False)
        self.goButton.clicked.connect(self.showCustomLabel)
                     
        self.DateLabel = QLabel("Select one option from above!")
        self.labelLow = QLabel('Low Demand')
        self.labelLow.setAlignment(Qt.AlignCenter)
        self.labelLow.setStyleSheet("background-color:#87CEEB;text-align: center;")
        self.labelAverage = QLabel('Average Demand')
        self.labelAverage.setAlignment(Qt.AlignCenter)
        self.labelAverage.setStyleSheet("background-color:#F0E68C;text-align: center;")
        self.labelHigh = QLabel('High Demand')
        self.labelHigh.setAlignment(Qt.AlignCenter)
        self.labelHigh.setStyleSheet("background-color:#D2691E;text-align: center;")
        self.labelVHigh = QLabel('Very High Demand')
        self.labelVHigh.setAlignment(Qt.AlignCenter)
        self.labelVHigh.setStyleSheet("background-color:#90EE90;text-align: center;")

        #########################################################
        # Create Ok and Reset bottom buttons#
        ########################################################

        okButton = QPushButton("OK")
        okButton.clicked.connect(lambda:self.close())
        resetButton = QPushButton("Reset")
        # resetButton.clicked.connect(self.reset)
        resetButton.clicked.connect(MyApp.restart)

        # Horizonal

        self.hbox3 = QHBoxLayout()
        self.hbox3.addWidget(self.cbYear)
        self.hbox3.addWidget(self.cbMonth)
        self.hbox3.addWidget(self.cbDay)
        self.hbox3.addWidget(self.cbHour)
        self.hbox3.addWidget(self.goButton)
        self.cbYear.hide()
        self.cbMonth.hide()
        self.cbDay.hide()
        self.cbHour.hide()
        self.goButton.hide()

        hbox2 = QHBoxLayout()
        hbox2.addWidget(okButton)
        hbox2.addWidget(resetButton)

        hbox1 = QHBoxLayout()
        hbox1.addWidget(self.labelLow)
        hbox1.addWidget(self.labelAverage)
        hbox1.addWidget(self.labelHigh)
        hbox1.addWidget(self.labelVHigh)

        self.webView = QWebEngineView()
        self.reset()

        vbox = QVBoxLayout()
        vbox.addLayout(self.grid)
        vbox.addWidget(self.DateLabel,alignment=Qt.AlignLeft)
        vbox.addLayout(self.hbox3)
        vbox.addWidget(self.webView,alignment=Qt.AlignTop)
        vbox.addLayout(hbox1)
        vbox.addLayout(hbox2)

        master_widget = QWidget()
        master_widget.setLayout(vbox)

        self.setCentralWidget(master_widget)
        self.show()

    @staticmethod
    def restart():
        MyApp.singleton = MyApp()
        

    def reset(self):
        m = folium.Map(location=[42.3505, -71.0760],zoom_start=13.5, control_scale=True)
        data = io.BytesIO()
        m.save(data, close_file=False)
        self.webView.setHtml(data.getvalue().decode())
        self.curButton.setChecked(False)
        self.cusButton.setChecked(False)
        self.DateLabel.setText("Select one option from above!")
        self.cbYear.hide()
        self.cbMonth.hide()
        self.cbDay.hide()
        self.cbHour.hide()
        self.goButton.hide()

    def showCustomLabel(self):
        y = self.cbYear.currentText()
        mo = self.cbMonth.currentText()
        d = self.cbDay.currentText()
        h = self.cbHour.currentText()
        date = str(y)+"-"+str(mo)+"-"+str(d)+" "+str(h)+":00:00"
        reqdate = dtf.strptime(date, "%Y-%m-%d %H:%M:%S")
        getPredictions(reqdate)
        mm = generateMap() 
        data = io.BytesIO()
        mm.save(data, close_file=False)
        self.webView.setHtml(data.getvalue().decode())
        self.cbYear.hide()
        self.cbMonth.hide()
        self.cbDay.hide()
        self.cbHour.hide()
        self.goButton.hide()
        self.DateLabel.setText(reqdate.strftime("%d %b %Y %H:%M:%S %A"))
        self.DateLabel.show()


    def current_text_changed(self):    
        sender = self.sender()
        oname = sender.objectName()
        if oname == "Year":
            self.cbMonth.setEnabled(True)
        elif oname == "Month":
            y = int(self.cbYear.currentText())
            mo = int(self.cbMonth.currentText())
            lday = int(monthrange(y, mo)[1]) + 1
            self.cbDay.addItems([format(x, '02d') for x in range(1,lday)])
            self.cbDay.setEnabled(True)
        elif oname == "Day":
            self.cbHour.setEnabled(True)
        elif oname == "Hour":
            self.goButton.setEnabled(True)

    def onClicked(self):
        self.reset()
        radioButton = self.sender()
        if radioButton.isChecked():
            if radioButton.text().startswith("Current"):
                now = dtf.now()
                self.DateLabel.setText(now.strftime("%d %b %Y %H:%M:%S %A"))
                self.DateLabel.show()
                getPredictions(now)
                mm = generateMap()
                # save map data to data object
                data = io.BytesIO()
                mm.save(data, close_file=False)
                self.webView.setHtml(data.getvalue().decode())               
            elif radioButton.text().startswith("Custom"):
                self.DateLabel.hide()
                self.cbYear.show()
                self.cbMonth.show()
                self.cbDay.show()
                self.cbHour.show()
                self.goButton.show()

if __name__ == "__main__":
    app = QApplication(sys.argv)
    app.setStyleSheet('''
        QWidget {
            font-size: 25px;
        }
    ''')
    MyApp.restart()  

    try:
        sys.exit(app.exec_())
    except SystemExit:
        print('Closing Window...')

In [None]:
#generateMap(mysources)