# Load the data necessary for the tutorials
It is not expected that you will understand all aspects of this script (though reading through it is still suggested). You only need to change the datalake name and data store name below. Once you have done that simply run all cells in this script and the data will be available in your tenant. Note this script should only be run once in your tenant otherwise you will receive errors about tables already existing. The tables are shared across all users.

### Set the Data Lake and SQL store name

In [None]:
datalake_name='DataLakeName'
sql_store_name='SqlStoreName'

### Load the max temperature data

In [None]:
!curl -o IDCJAC0010_009021_1800_Data.csv https://raw.githubusercontent.com/SnowdenNeuroverse/NeuroTraining/master/Data/PerthWeatherStationData/IDCJAC0010_009021_1800_Data.csv
!curl -o IDCJAC0010_009106_1800_Data.csv https://raw.githubusercontent.com/SnowdenNeuroverse/NeuroTraining/master/Data/PerthWeatherStationData/IDCJAC0010_009106_1800_Data.csv
!curl -o IDCJAC0010_009215_1800_Data.csv https://raw.githubusercontent.com/SnowdenNeuroverse/NeuroTraining/master/Data/PerthWeatherStationData/IDCJAC0010_009215_1800_Data.csv
!curl -o IDCJAC0010_009225_1800_Data.csv https://raw.githubusercontent.com/SnowdenNeuroverse/NeuroTraining/master/Data/PerthWeatherStationData/IDCJAC0010_009225_1800_Data.csv
!curl -o IDCJAC0010_009265_1800_Data.csv https://raw.githubusercontent.com/SnowdenNeuroverse/NeuroTraining/master/Data/PerthWeatherStationData/IDCJAC0010_009265_1800_Data.csv

### Import libraries

In [None]:
from neuro_python.neuro_data import schema_manager as sm
from neuro_python.neuro_data import sql_commands as sc
from neuro_python.neuro_data import sql_query as sq
from neuro_python.neuro_compute import spark_manager as spm
import pandas as pd
import time

### Store Weather Data

In [None]:
cols=[sm.column_definition('StationNumber','Int64'),
      sm.column_definition('Year','Int64'),
      sm.column_definition('Month','Int64'),
      sm.column_definition('Day','Int64'),
      sm.column_definition('MaxTemp','Double'),
      sm.column_definition('Quality','String(5)')]
table_def=sm.table_definition(cols,'Processed',file_type='delta')

files=['IDCJAC0010_009021_1800_Data.csv','IDCJAC0010_009106_1800_Data.csv','IDCJAC0010_009215_1800_Data.csv','IDCJAC0010_009225_1800_Data.csv',
      'IDCJAC0010_009265_1800_Data.csv']
for file in files:
    df=pd.read_csv(file)
    df.rename(columns={'Bureau of Meteorology station number':'StationNumber','Maximum temperature (Degree C)':'MaxTemp'},inplace=True)
    df=df[['StationNumber', 'Year','Month','Day','MaxTemp','Quality']]
    table_name='D3S_Training_%s_MaxTemp'%df['StationNumber'][0]
    sm.create_table(sql_store_name,table_name,table_def)
    sm.create_table(datalake_name,table_name,table_def)
    sc.df_to_sql(sql_store_name,table_name,df)
    print("Table %s written to sql"%table_name)
job = spm.submit_job('TransferFromSqlToDataLake','df2=df1',
               import_tables=[spm.import_table('df1',sql_store_name,'D3S_Training_9021_MaxTemp')],
               export_tables=[spm.export_table('df2',datalake_name,'D3S_Training_9021_MaxTemp')])
for x in [9021,9106,9215,9225,9265]:
    run=spm.run_job(job['JobId'],
                'Test',
                override_import_tables=[spm.import_table('df1',sql_store_name,'D3S_Training_%s_MaxTemp'%str(x))],
                override_export_tables=[spm.export_table('df2',datalake_name,'D3S_Training_%s_MaxTemp'%str(x))])
    while spm.list_runs(job['JobId'],run_id=run['RunId'])[0]['Status']=='Running':
        time.sleep(5)
    sm.delete_processed_table(sql_store_name,'D3S_Training_%s_MaxTemp'%str(x))
    print("Table %s written to datalake and sql table deleted"%x)
spm.remove_job(job['JobId'])

### Create and store reference table

In [None]:
#create reference table
ref_df=pd.DataFrame({'StationNumber':[9021,9106,9215,9225,9265],
                     'StationName':['PERTH AIRPORT','GOSNELLS CITY','SWANBOURNE','PERTH METRO','HILLARYS BOAT HARBOUR NTC AWS'],
                     'Lat':[-31.93,-32.05,-31.96,-31.92,-31.83],
                     'Lon':[115.98,115.98,115.76,115.87,115.74]})
cols=[sm.column_definition('StationNumber','Int64'),
      sm.column_definition('StationName','String(50)'),
      sm.column_definition('Lat','Double'),
      sm.column_definition('Lon','Double')]
table_def=sm.table_definition(cols,'Processed')
sm.create_table(sql_store_name,'D3S_Training_WeatherStations',table_def)
sc.df_to_sql(sql_store_name,'D3S_Training_WeatherStations',ref_df)