In [1]:
import pandas as pd
import fastparquet
import sqlite3

In [2]:
# Check cow diet and use for calculations 

input_data = pd.read_parquet('../test_files/merged_cow_with_feed_7day_means_20230530_1557.parquet')

# Remove animal inputs to work with just diet 
columns_to_remove = ['lactation_number', 'days_in_milk', 'MY', 'weight', 'BW_smooth', 'BW_gain', 'asfed_intake', 'DMI', 'bcs_value', 'Birth Date', 'Test Day Date',
                     'Lact Start Date', 'Fat %', 'Protein %', 'SCC', 'Pregnancy Indicator', 'Days to Last Breeding', 'days_preg', 'conception_date', 'age_m']
diet_data = input_data.drop(columns=columns_to_remove)

# Remove rows missing diet information
diet_data = diet_data.dropna(subset=['sampleId'])

In [None]:
# Find all the unique diet being fed

# diets = diet_data.sampleId.unique()
# sample_days = diet_data.reportDate.unique()
# # print(diets)
# # print(sample_days)

unique_diets = diet_data[['sampleId', 'reportDate']].drop_duplicates()
print(unique_diets)

In [None]:
# Seting up a database with sqlite3

# This connects to the specified database, if the database does not exist it will be created
conn = sqlite3.connect('../diet_database.db')

# Create a cursor to interact with database
cursor = conn.cursor()

# Save the diet_data dataframe to the database table 'current_diets'
diet_data.to_sql('current_diets', conn, if_exists='replace', index=False)

# Close connection to database once finished 
conn.close()

In [None]:
# Display a database table in python

conn = sqlite3.connect('../diet_database.db')
cursor = conn.cursor()

cursor.execute('SELECT * FROM current_diets')
rows = cursor.fetchall()

for row in rows:
    print(row)

conn.close()

In [4]:
# Create dataframe with each unique diet

unique_diets = diet_data[['sampleId', 'reportDate']].drop_duplicates()
unique_diet_list = unique_diets.index.values.tolist()

# Index numbers in list are off because I deleted the top of the dataframe with missing feed data
# If I had not then 'diet_data' could be used here instead of 'input_data'
current_diets = input_data.iloc[unique_diet_list]

# Removing all non diet data 
columns_to_remove = ['lactation_number', 'days_in_milk', 'MY', 'weight', 'BW_smooth', 'BW_gain', 'asfed_intake', 'DMI', 'bcs_value', 'Birth Date', 'Test Day Date',
                     'Lact Start Date', 'Fat %', 'Protein %', 'SCC', 'Pregnancy Indicator', 'Days to Last Breeding', 'days_preg', 'conception_date', 'age_m', 'cow_id', 'date', 'DIM_bins_w']
current_diets = current_diets.drop(columns=columns_to_remove)



In [5]:
# Generate a unique ID for each diet

def assign_diet_id(row):
    return f"{row['sampleId']}_{row['reportDate']}"

# Assign IDs to diet data
new_column = current_diets.apply(assign_diet_id, axis=1)
current_diets.insert(0, 'Diet_ID', new_column)
# diet_data.drop(columns=['sampleId', 'reportDate'])

# Assign IDs to animal data


In [6]:
# Save unique diets to database
conn = sqlite3.connect('../diet_database.db')
cursor = conn.cursor()

current_diets.to_sql('current_diets', conn, if_exists='replace', index=False)

conn.close()