/home/student/dodhiajk/CGC_Grain_Outcome_Predictions/.venv/bin/python: No match.


Note: you may need to restart the kernel to use updated packages.


In [62]:
import sys
import os
from dotenv import load_dotenv

import numpy as np
import pandas as pd
import sqlalchemy as sq

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import tensorflow
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

sys.path.append("../Shared/")
from DataService import DataService

# fetch data from db

In [3]:
# function to update logs
def updateLog(fileName: str, message: str) -> None:
    try:
        if fileName is not None:
            with open(fileName, "a") as log:
                log.write(message + "\n")
    except Exception as e:
        print(message)

In [4]:
LOG_FILE = "/data/pull_moisture.log"

load_dotenv()
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")

In [5]:
if (
    PG_DB is None
    or PG_ADDR is None
    or PG_PORT is None
    or PG_USER is None
    or PG_PW is None
):
    updateLog(LOG_FILE, "Missing database credentials")
    raise ValueError("Environment variables are not set")
else:
    # connicting to database
    db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
    conn = db.connect()

In [6]:
# pulling soil moisture data
query = sq.text("select * FROM public.agg_soil_moisture")
sm_df = pd.read_sql(query, conn)

In [45]:
sm_df.head()

Unnamed: 0,index,year,month,day,cr_num,district,soil_moisture_min,soil_moisture_max,soil_moisture_mean
0,0,1978,11,1,0,4612,0.1659,0.41871,0.237053
1,1,1978,11,1,5,4740,0.12714,0.207248,0.163722
2,2,1978,11,1,5,4741,0.153398,0.215304,0.170989
3,3,1978,11,1,7,4770,0.140772,0.18639,0.152211
4,4,1978,11,1,7,4771,0.11681,0.11681,0.11681


In [8]:
# pull ergot data
query = sq.text("select * FROM public.agg_ergot_sample")
ergot_df = pd.read_sql(query, conn)

In [19]:
ergot_df.drop(columns=["sample_id"], inplace=True)

In [52]:
ergot_df = ergot_df.drop_duplicates()

In [53]:
ergot_df

Unnamed: 0,year,province,crop_district,incidence,severity,district,percnt_true,has_ergot,sum_severity,present_prev1,present_prev2,present_prev3,present_in_neighbor,severity_prev1,severity_prev2,severity_prev3,severity_in_neighbor
0,1995,AB,1,False,0.000,4810,0.000000,False,0.000,False,False,False,False,0.000000,0.000000,0.000000,0.000000
48,1995,AB,2,False,0.000,4820,0.000000,False,0.000,False,False,False,True,0.000000,0.000000,0.000000,0.005747
190,1995,AB,3,False,0.000,4830,0.010582,True,11.000,False,False,False,True,0.000000,0.000000,0.000000,0.001817
230,1995,AB,4,False,0.000,4840,0.006017,True,15.000,False,False,False,False,0.000000,0.000000,0.000000,0.000000
280,1995,AB,4,True,1.000,4840,0.006017,True,15.000,False,False,False,False,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158540,2022,AB,3,True,0.260,4830,0.419355,True,1.270,True,True,True,True,0.025641,0.513369,0.083333,0.279330
158636,2022,SK,72,True,0.025,4771,0.333333,True,0.726,False,True,True,True,0.000000,0.153333,0.013072,0.366292
158655,2022,MB,3,True,0.000,4603,0.535354,True,1.360,False,True,True,True,0.000000,0.309353,0.043165,0.487047
158660,2022,SK,41,True,0.010,4740,0.111111,True,0.010,False,False,False,True,0.000000,0.000000,0.000000,0.162162


In [10]:
# pull weather data

In [54]:
# joining tables
df = pd.merge(sm_df, ergot_df, how="inner", on=["year", "district"])
# df = sm_df.merge(ergot_df, on=["year", "district"], how="inner")

In [55]:
df.shape

(353355, 24)

In [56]:
df.drop_duplicates(inplace=True)

In [57]:
df.shape

(353355, 24)

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 353355 entries, 0 to 11632418
Data columns (total 24 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   index                 353355 non-null  int64  
 1   year                  353355 non-null  int64  
 2   month                 353355 non-null  int64  
 3   day                   353355 non-null  int64  
 4   cr_num                353355 non-null  int64  
 5   district              353355 non-null  int64  
 6   soil_moisture_min     353355 non-null  float64
 7   soil_moisture_max     353355 non-null  float64
 8   soil_moisture_mean    353355 non-null  float64
 9   province              353355 non-null  object 
 10  crop_district         353355 non-null  int64  
 11  incidence             353355 non-null  bool   
 12  severity              353355 non-null  float64
 13  percnt_true           353355 non-null  float64
 14  has_ergot             353355 non-null  bool   
 15

# split data

# creating model

In [63]:
model = Sequential()

model.add(Dense(11, activation="relu", input_dim = 11))
model.add(Dense(11, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

2023-07-01 15:18:39.120466: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-01 15:18:39.186643: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-01 15:18:39.186943: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [64]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                132       
                                                                 
 dense_1 (Dense)             (None, 11)                132       
                                                                 
 dense_2 (Dense)             (None, 1)                 12        
                                                                 
Total params: 276
Trainable params: 276
Non-trainable params: 0
_________________________________________________________________


In [None]:
# compile model
model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])