<a href="https://colab.research.google.com/github/Mikdown/Lottery-Picker/blob/master/colab_lotto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)
import numpy as np
import datetime
import random
import sqlite3
from collections import Counter
mega_draw_file_url = ("https://raw.githubusercontent.com/Mikdown/Lottery-Picker/master/assets/megamillions.csv")
mega_weather_file_url = ("https://raw.githubusercontent.com/Mikdown/Lottery-Picker/master/assets/mega_weather.csv")
pb_draw_file_url = ("https://raw.githubusercontent.com/Mikdown/Lottery-Picker/master/assets/powerball.csv")
pb_weather_file_url = ("https://raw.githubusercontent.com/Mikdown/Lottery-Picker/master/assets/pb_weather.csv")

conn = sqlite3.connect('assets/lottery_data.db')

cur = conn.cursor()

###The following four code cells meet the first and second feature requirements:
1. Feature 1. Read two data files (CSV).
2. Feature 2. Clean your data.
    - Four CSV files are read in from Github and cleaned(transformed) with Pandas. 
3. Feature 1. Set up a local database and read data in with SQLite.
4. Feature 2. Perform a SQL join.
    - After transformation the files are read into a local SQLite3 DB to be used for futher analysis.
    - Data is read in from the database, filtered, grouped and a SQL join is performed on 2 tables.

In [None]:
mega_draw_file = (mega_draw_file_url)
mega_weather_file = (mega_weather_file_url)

mega_draw_df = pd.read_csv(mega_draw_file, engine='python', parse_dates= {"draw_date" : ["year","month","day"]})
mega_weather_df = pd.read_csv(mega_weather_file, engine='python')

#Split the "DATE_TIME" column into 2 seperate columns "Date" and "Time".

mega_weather_df['Date'] = pd.to_datetime(mega_weather_df['DATE_TIME']).dt.strftime('%Y%m%d')
mega_weather_df['Time'] = pd.to_datetime(mega_weather_df['DATE_TIME']).dt.strftime('%H%M')

mega_weather_df['Date'] = pd.to_datetime(mega_weather_df['Date'])
mega_weather_df['DATE_TIME'] = pd.to_datetime(mega_weather_df['DATE_TIME'])

mega_draw_df.rename({'draw_date': 'Date'}, axis=1, inplace=True)
mega_draw_df['Date'] = pd.to_datetime(mega_draw_df['Date']).dt.strftime('%Y%m%d')
mega_draw_df['Date'] = pd.to_datetime(mega_draw_df['Date'])
mega_draw_df['month'] = pd.DatetimeIndex(mega_draw_df['Date']).month_name()
mega_draw_df['day_name'] = pd.DatetimeIndex(mega_draw_df['Date']).day_name()
mega_draw_df['day_num'] = pd.DatetimeIndex(mega_draw_df['Date']).dayofweek
mega_draw_df['day_date'] = pd.DatetimeIndex(mega_draw_df['Date']).day
mega_draw_df['draw_time'] = '2300'
mega_draw_df[['num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'mb']] = mega_draw_df[['num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'mb']].astype(int)


mega_draw_df.to_sql('mm_draw', conn, if_exists='replace', index = False)
mega_weather_df.to_sql('mm_weather', conn, if_exists='replace', index = False)

print(mega_draw_df)
print(mega_weather_df)

In [None]:
mm_sql_filter = '''SELECT * FROM mm_weather WHERE (Time > 2200 AND Time < 2359)'''
mm_weather_filtered_df = pd.read_sql(mm_sql_filter, conn)

mm_weather_filtered_df['DATE_TIME'] = pd.to_datetime(mm_weather_filtered_df['DATE_TIME'])
mm_weather_filtered_df['Date'] = pd.to_datetime(mm_weather_filtered_df['Date'])
mm_weather_filtered_df = mm_weather_filtered_df.fillna(0)

mm_weather_filtered_df.to_sql('mm_weather_filtered', conn, if_exists='replace', index = False)

mm_sql_grooper = '''SELECT * FROM mm_weather_filtered GROUP BY Date'''
mm_weather_grouped_df = pd.read_sql(mm_sql_grooper, conn)

mm_weather_grouped_df.to_sql('mm_weather_grouped', conn, if_exists='replace', index = False)

mm_sql_join = '''SELECT * FROM mm_draw JOIN mm_weather_grouped USING (Date)'''
mm_join_df = pd.read_sql(mm_sql_join, conn)

mm_join_df.to_sql('mm_join', conn, if_exists='replace', index = False)
print(mm_weather_filtered_df)
print(mm_weather_grouped_df)
print(mm_join_df)

In [None]:
mm_counts_1 = mega_draw_df['num_1'].value_counts()
mm_counts_2 = mega_draw_df['num_2'].value_counts()
mm_counts_3 = mega_draw_df['num_3'].value_counts()
mm_counts_4 = mega_draw_df['num_4'].value_counts()
mm_counts_5 = mega_draw_df['num_5'].value_counts()
mm_counts_mb = mega_draw_df['mb'].value_counts()

mm_counts_df = pd.concat([mm_counts_1, mm_counts_2, mm_counts_3, mm_counts_4, mm_counts_5, mm_counts_mb], axis=1)
mm_counts_df['mb'] = mm_counts_df['mb'].fillna(0).astype(int)
col_list= list(mm_counts_df)
col_list.remove('mb')

mm_counts_df['totals'] = mm_counts_df[col_list].sum(axis=1)
mm_counts_df.sort_index(ascending=True)
mm_counts_df.to_sql('mm_counts', conn, if_exists='replace', index = False)

print(mm_counts_df)


In [None]:
pb_draw_file = (pb_draw_file_url)
pb_weather_file = (pb_weather_file_url)

pb_draw_df = pd.read_csv(pb_draw_file, engine='python', parse_dates= {"draw_date" : ["year","month","day"]})
pb_weather_df = pd.read_csv(pb_weather_file, engine='python')

pb_weather_df['Date'] = pd.to_datetime(pb_weather_df['DATE_TIME']).dt.strftime('%Y%m%d')
pb_weather_df['Time'] = pd.to_datetime(pb_weather_df['DATE_TIME']).dt.strftime('%H%M')

pb_weather_df['Date'] = pd.to_datetime(pb_weather_df['Date'])
pb_weather_df['DATE_TIME'] = pd.to_datetime(pb_weather_df['DATE_TIME'])

pb_draw_df.rename({'draw_date': 'Date'}, axis=1, inplace=True)
pb_draw_df['Date'] = pd.to_datetime(pb_draw_df['Date']).dt.strftime('%Y%m%d')
pb_draw_df['Date'] = pd.to_datetime(pb_draw_df['Date'])
pb_draw_df['month'] = pd.DatetimeIndex(pb_draw_df['Date']).month_name()
pb_draw_df['day_name'] = pd.DatetimeIndex(pb_draw_df['Date']).day_name()
pb_draw_df['day_num'] = pd.DatetimeIndex(pb_draw_df['Date']).dayofweek
pb_draw_df['day_date'] = pd.DatetimeIndex(pb_draw_df['Date']).day
pb_draw_df['draw_time'] = '2300'
pb_draw_df[['num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'mb']] = mega_draw_df[['num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'mb']].astype(int)

pb_draw_df.to_sql('pb_draw', conn, if_exists='replace', index = False)
pb_weather_df.to_sql('pb_weather', conn, if_exists='replace', index = False)

print(pb_draw_df)
print(pb_weather_df)


In [None]:
pb_sql_filter = '''SELECT * FROM pb_weather WHERE (Time > 2200 AND Time < 2359)'''
pb_weather_filtered_df = pd.read_sql(pb_sql_filter, conn)
pb_weather_filtered_df['DATE_TIME'] = pd.to_datetime(pb_weather_filtered_df['DATE_TIME'])
pb_weather_filtered_df['Date'] = pd.to_datetime(pb_weather_filtered_df['Date'])
pb_weather_filtered_df = pb_weather_filtered_df.fillna(0)

pb_weather_filtered_df.to_sql('pb_weather_filtered', conn, if_exists='replace', index = False)

pb_sql_grooper = '''SELECT * FROM pb_weather_filtered GROUP BY Date'''
pb_weather_grouped_df = pd.read_sql(pb_sql_grooper, conn)

pb_weather_grouped_df.to_sql('pb_weather_grouped', conn, if_exists='replace', index = False)

pb_sql_join = '''SELECT * FROM pb_draw JOIN pb_weather_grouped USING (Date)'''
pb_join_df = pd.read_sql(pb_sql_join, conn)

pb_join_df.to_sql('pb_join', conn, if_exists='replace', index = False)
print(pb_weather_filtered_df)
print(pb_weather_grouped_df)
print(pb_join_df)

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1679398882113' style='position: relative'><noscript><a href='#'><img alt='Dashboard 1 ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Me&#47;MegamillionsNumbers&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='MegamillionsNumbers&#47;Dashboard1' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Me&#47;MegamillionsNumbers&#47;Dashboard1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en-US' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1679398882113');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.minWidth='420px';vizElement.style.maxWidth='650px';vizElement.style.width='100%';vizElement.style.minHeight='587px';vizElement.style.maxHeight='887px';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.minWidth='420px';vizElement.style.maxWidth='650px';vizElement.style.width='100%';vizElement.style.minHeight='587px';vizElement.style.maxHeight='887px';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.height='1677px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

In [None]:
#weather_df = weather_df.astype({"HourlyRelativeHumidity": float})
weather_filter_df = mega_weather_df.query('DBTemp == 42 & RHumid == 73')
#weather_df.dtypes
#weather_filter_df
weather_date_unique = weather_filter_df['Date'].unique()
weather_date_unique_df = pd.DataFrame(weather_date_unique, columns=['date_time'])
weather_date_unique_df['date_time'] = pd.to_datetime(weather_date_unique_df['date_time'])

weather_date_unique_df['time_only'] = pd.to_datetime(weather_date_unique_df['date_time']).dt.time
weather_date_unique_df['draw_date'] = pd.to_datetime(weather_date_unique_df['date_time']).dt.date
weather_date_unique_df['draw_date'] = pd.to_datetime(weather_date_unique_df['draw_date'])
weather_date_unique_df['draw_date'] = weather_date_unique_df['draw_date'].dt.strftime('%m/%d/%Y')



dates_list = weather_date_unique_df['draw_date'].values.tolist()

#with pd.option_context('display.max_rows', None,
#                       'display.max_columns', None,
#                       'display.precision', 3,
#                       ):
#    print(weather_date_unique_df)
dates_list


In [None]:
arr = mega_draw_df.iloc[:, 2:7].values

mega_draw_df['check'] = np.any(((arr[:, 1:] - arr[:, :-1]) == 1),axis=1).astype(int)
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(mega_draw_df)

mega_draw_df.dtypes

In [None]:
mega_draw_df['draw_date'] = pd.to_datetime(mega_draw_df['draw_date'])
date_df = mega_draw_df[mega_draw_df['draw_date'].dt.month == 12]

counts_1 = date_df['num_1'].value_counts()
counts_2 = date_df['num_2'].value_counts()
counts_3 = date_df['num_3'].value_counts()
counts_4 = date_df['num_4'].value_counts()
counts_5 = date_df['num_5'].value_counts()
counts_MB = date_df['MB'].value_counts()
counts_df = pd.concat([counts_1, counts_2, counts_3, counts_4, counts_5, counts_MB], axis=1)

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(counts_df)
    

In [None]:
mega_draw_df['draw_date'].astype(str)
df2 = mega_draw_df[mega_draw_df['draw_date'].isin(dates_list)]




with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df2)



In [None]:
counts_1 = df2['num_1'].value_counts()
counts_2 = df2['num_2'].value_counts()
counts_3 = df2['num_3'].value_counts()
counts_4 = df2['num_4'].value_counts()
counts_5 = df2['num_5'].value_counts()
counts_MB = df2['MB'].value_counts()
counts_MB_df= pd.DataFrame(counts_MB)
sorted_MB = counts_MB_df.sort_values('MB', ascending=False)
counts_df = pd.concat([counts_1, counts_2, counts_3, counts_4, counts_5], axis=1)
counts_df['totals'] = counts_df.sum(axis=1, skipna=True)
sorted_df = counts_df.sort_values('totals', ascending=False)

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(sorted_df)
    print(sorted_MB)

In [None]:
draw_list=(sorted_df.index.values)
draw_list_MB=(sorted_MB.index.values)
print(draw_list)
print(draw_list_MB)
type(draw_list)

In [None]:
draw_list = list(draw_list)
new_list1 = random.sample(draw_list, 5)
new_list1 = sorted(new_list1)
draw_list_MB = list(draw_list_MB)
new_list_MB = random.sample(draw_list_MB, 1)
#sorted([str(x) for x in new_list1])
print(new_list1 + new_list_MB)
type(new_list1[1])