### Import Libraries

In [1]:
import pyodbc
import pandas as pd
import os
import time
import numpy as np

### Get Path to the Excel Files and Sql File

In [2]:
# root directory of the assessment
root_dir = os.getcwd()     

# this is path to the dataset
dataset = os.path.join(root_dir, "dataset")

# load excel dataset
dsr_data = os.path.join(dataset, "DSR.xlsx")
farms_data = os.path.join(dataset, "farms.xlsx")
producers_data = os.path.join(dataset, "producers.xlsx")
varieties_data = os.path.join(dataset, "varieties.xlsx")
vessels_data = os.path.join(dataset, "vessels.xlsx")

# loop through all the drivers we have access to
for driver in pyodbc.drivers():
    print(driver)

SQL Server
SQL Server Native Client 11.0
ODBC Driver 17 for SQL Server
SQL Server Native Client RDA 11.0


### Connecting to the Databases

In [3]:
class ConnectDB(object):
    """
    This is a blueprint for codnnecting to the local databases
    """
    
    def __init__(self):
        self.server = "DESKTOP-E5PL80T\SQLEXPRESS"
        self.database = "Atom_Assessment_2021"
    
    def get_connection(self):
        """
        This method connects to the database
        :returns:
            cur, conn
        """

        try:
            # connect to the database
            conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server}; \
                   SERVER=' + self.server + '; \
                   DATABASE=' + self.database +';\
                   Trusted_Connection=yes;')
            cursor = conn.cursor()
            print("Connecion created")
        
        except Exception as e:
                
            print("Error: Could not get the cursor to the database")
            print(e)
        
        return conn, cursor      

In [4]:
# start time
start_time = time.time()

# define db connect object
connect_db_object = ConnectDB()

# establish the connection 
conn, cursor = connect_db_object.get_connection()
print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Connecion created
Time to connect to the DB:  0.00326 minutes


### Display Table Names from the Server

In [5]:
# sql quecry for displaying all tables from the databases
cursor.execute("SELECT table_name FROM information_schema.tables;")

# fetch all tables
tables  = cursor.fetchall()

# iterate through the database and display all table names
for table_name in tables:
    print(table_name)

('DSR', )
('Farms', )
('Producers', )
('Varieties', )
('Vessels', )
('sysdiagrams', )


### Applying Data Preprocessing

In [6]:
class DataPreprocessing(object):
    """
    This is a blueprint for loading dataset
    """
    def __init__(self):
        pass
    
    def get_excel_data(self, file_name):
        """
        This method loads excel dataset
        
        args:
            file_name: the name of the file
            excel_file: file to import into the sql database
            
        returns: 
            dataframe
        """
        pd.set_option("display.max_columns", None)
        df = pd.read_excel(open(file_name,'rb'))
        
        return df
    
#     def load_data_to_db(self, tb_name, conn, data):
#         """
#         This method loads excel data to the dabase
        
#         """
        
#         # data.to_sql('book_details', con = engine, if_exists = 'append', chunksize = 1000)
        
#         sql_insert = "INSERT INTO "
#         data.to_sql(tb_name , con = conn, if_exists = "append")
#         print("Successfully imported {} data into the database".format( data))
        
    def get_preprocessing(self, df):
        """
        This method prints the sum of null values and replace null values with 0 for easier processing.
        
        arg:
        --- df: this is an excel dataframe with null values
        
        returns:
        --- df: preprocesses excel dataframe 
        """
        
        # compute the sum of null values in the dataframe
        sum_of_null_values = df.isnull().sum()
            
        print("Sum of null values in the dataframe:")    
        print(sum_of_null_values)
        
        # replaced inf values with nan and it worked
        df.replace([np.inf, -np.inf], np.nan, inplace = True)
        
        # fill ann null values with zeros 
        df = df.fillna(0)
        
        return df
    
    def check_if_table_is_empty(self,excel_data, table, query):
        """
        This method check if the table is empty if not then print that it is not empty.
        
        args:
        --- table: this is a table from the database
        --- query: sql query to execute
        --- excel_data: this is the excel data to import into the SQL database
        
     
        """
        
        # sql query
        query_check = "select count(1) where exists (select * from {})".format(table) 

        sql_query = pd.read_sql_query(query_check, conn)
        df = pd.DataFrame(sql_query)   
        df.style.hide_index()

        # check the table is empty
        if df.empty:
            
            start_time = time.time()
            # interate through every row and insert into the database
            for row in excel_data.itertuples(index=False):
                # 
                cursor.execute(query,row)
            
            # Save the changes
            conn.commit()
            print("Successfully populated the SQL database")
        else:
            print("The {} table is NOT empty.".format(table))

### Load Detailed Season Report (DSR) Raw Dataset

In [7]:
# excel data object
excel_data_object = DataPreprocessing()

# load dsr dataframe 
dsr_df = excel_data_object.get_excel_data(dsr_data)

print("Time to load DSR data: ",round((time.time() - start_time)/60,5),"minutes")

Time to load DSR data:  2.71213 minutes


In [8]:
# initialize preprocessing object
preprocessing_object = DataPreprocessing()

# get DRS dataframe
dsr_df = preprocessing_object.get_preprocessing(dsr_df)

# visualize DSR raw dataset
dsr_df.head()

Sum of null values in the dataframe:
Year                       0
ProducerID                 0
FarmID                     0
Sales Week                 0
Pallet ID                  0
Sale ID                    0
Barcode                    0
Grade Code                 0
Mark Code                  0
VarietyID                  0
Pack Code                  0
Count Code                 2
Exchange Rate              0
Currency                   0
QC                         0
Sequence Number            0
Production ID            209
Run Number             96551
Container No           55337
VesselID                   0
Port Of Discharge      42479
Target Market              0
Inventory Code           587
Trader ID                  0
Pallet Size                0
From Barcode             211
To Barcode                 0
Invoice ID                 0
Selected                 209
No Cartons                 0
Advance Price              0
Producer Adjustment        0
Advance Purchase          44
Purcha

Unnamed: 0,Year,ProducerID,FarmID,Sales Week,Pallet ID,Sale ID,Barcode,Grade Code,Mark Code,VarietyID,Pack Code,Count Code,Exchange Rate,Currency,QC,Sequence Number,Production ID,Run Number,Container No,VesselID,Port Of Discharge,Target Market,Inventory Code,Trader ID,Pallet Size,From Barcode,To Barcode,Invoice ID,Selected,No Cartons,Advance Price,Producer Adjustment,Advance Purchase,Purchase Price,Final Price,Total,Debit Note,Credit Note,Total Local Cost,Total Export Cost,Total Weight,Pack Nett Weight,Nett Weight,Pack Gross Weight,Gross Weight,FOB,DIP,VAT,Return To Grower
0,2017,201100,45,24,201233,3124,960091600127485952,1,AMA,74,A15C,72,13.424682,USD,False,1,291.0,0,CXRU1499008,6798,BDCGP,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,6100,1.0,88,0,0,0.0,0.0,10.5,924.0,0.0,0.0,672.090194,0.0,0,16.0,1280.0,16.95,1356.0,10.5,139.324307,-55.907373,10074.41044
1,2017,201100,45,25,207565,10375,960091600127457024,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,11024,1.0,91,0,0,0.0,0.0,10.5,955.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,-47.779393,9875.083022
2,2017,201100,45,25,100285,10375,960091600127485952,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,11024,1.0,93,0,0,0.0,0.0,10.5,976.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,52.220607,9875.083022
3,2017,201100,45,25,181685,7374,960091600127457024,1,AMA,74,A15C,64,13.583237,USD,False,1,291.0,0,PONU4875876,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,2961,1.0,89,0,0,0.0,0.0,10.5,934.5,0.0,0.0,754.994818,0.0,0,16.0,1280.0,16.95,1356.0,10.5,141.074018,-40.300725,10122.17772
4,2017,201100,45,26,66238,6697,960091600127469952,1,AMA,700,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,85,0,0,0.0,0.0,7.0,595.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,73.75377,6614.094181


### Load Farms Raw Dataset

In [9]:
# start time
start_time = time.time()

# loads farms raw dataset
farms_df  = excel_data_object.get_excel_data(farms_data)

print("Time to loads farms data: ",round((time.time() - start_time)/60,5),"minutes")

Time to loads farms data:  0.00042 minutes


In [10]:
# get Farms dataframe
farms_df = preprocessing_object.get_preprocessing(farms_df)

# visualize Farms raw dataset
farms_df.head()

Sum of null values in the dataframe:
ID           0
Farm Name    0
dtype: int64


Unnamed: 0,ID,Farm Name
0,45,Tel Dan
1,54,Urkish
2,62,Lehi
3,83,Shur
4,154,Havilah


### Load Producers Raw Dataset

In [11]:
# start time
start_time = time.time()

# loads producers raw dataset
producers_df  =  excel_data_object.get_excel_data(producers_data)

print("Time to load producers: ",round((time.time() - start_time)/60,5),"minutes")

Time to load producers:  0.00041 minutes


In [12]:
# get Producers dataframe
producers_df = preprocessing_object.get_preprocessing(producers_df)

# visualize Producers raw dataset
producers_df.head()

Sum of null values in the dataframe:
ID          0
Producer    1
dtype: int64


Unnamed: 0,ID,Producer
0,201100,Alderaan
1,318820,Yavin IV
2,351986,Hoth
3,240822,Dagobah
4,11959,Bespin


### Load Varieties Raw Dataset

In [13]:
# start time
start_time = time.time()

# loads varieties raw dataset
varieties_df  = excel_data_object.get_excel_data(varieties_data)

print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Time to connect to the DB:  0.00064 minutes


In [14]:
# get Varieties dataframe
varieties_df = preprocessing_object.get_preprocessing(varieties_df)

# visualize Varieties raw dataset
varieties_df.head()

Sum of null values in the dataframe:
ID                 0
Variety Group     16
Variety Code       0
Commodity Code     0
dtype: int64


Unnamed: 0,ID,Variety Group,Variety Code,Commodity Code
0,45,AGN,AGN,OR
1,965,ALN,ALN,OR
2,937,ANL,ANL,OR
3,610,BAR,BAR,GR
4,686,BIN,ELL,SC


### Load Vessels Raw Dataset

In [15]:
# start time
start_time = time.time()

# loads vessels raw dataset
vessels_df = excel_data_object.get_excel_data(vessels_data)

print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Time to connect to the DB:  0.00078 minutes


In [16]:
# get vessels dataframe
vessels_df = preprocessing_object.get_preprocessing(vessels_df)

# visualize vessels raw dataset
vessels_df.head()

Sum of null values in the dataframe:
ID        0
Vessel    1
dtype: int64


Unnamed: 0,ID,Vessel
0,4271,ADRIAN SCHULTE
1,1347,ALEXANDRA
2,2653,ALMAVIVA
3,4496,ALS Venus
4,6744,AMANDA D


## 1. Import the Excel Data into a SQL Database 

#### Import Detailed Season Report (DSR)

In [17]:
# table in the database
dsr_db_table = "db.DSR"

# excel data object
excel_data_object = DataPreprocessing()

# sql query to insert into the table 
query = "INSERT INTO {} VALUES (?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?)".format(dsr_db_table)

# get the method to check if the table is empty or not
excel_data_object.check_if_table_is_empty(dsr_df, dsr_db_table, query)

The db.DSR table is NOT empty.


#### Import Farms Dataset

In [18]:
# table in the database
farms_db_table = "db.Farms"

# excel data object
excel_data_object = DataPreprocessing()

# sql query to insert into the table 
query = "INSERT INTO {} VALUES (?,?)".format(farms_db_table)

# get the method to check if the table is empty or not
excel_data_object.check_if_table_is_empty(farms_df, farms_db_table, query)

The db.Farms table is NOT empty.


#### Import Producers Dataset

In [19]:
# table in the database
producers_db_table = "db.Producers"

# excel data object
excel_data_object = DataPreprocessing()

# sql query to insert into the table 
query = "INSERT INTO {} VALUES (?,?)".format(producers_db_table)

# get the method to check if the table is empty or not
excel_data_object.check_if_table_is_empty(producers_df, producers_db_table, query)

The db.Producers table is NOT empty.


#### Import Varieties Dataset

In [20]:
# table in the database
varieties_db_table = "db.Varieties"

# excel data object
excel_data_object = DataPreprocessing()

# sql query to insert into the table 
query = "INSERT INTO db.Varieties VALUES (?,?,?,?)".format(varieties_db_table)

# get the method to check if the table is empty or not
excel_data_object.check_if_table_is_empty(varieties_df, varieties_db_table, query)

The db.Varieties table is NOT empty.


#### Import Vessels Dataset

In [21]:
# table in the database
vessels_db_table = "db.Vessels"

# excel data object
excel_data_object = DataPreprocessing()

# sql query to insert into the table 
query = "INSERT INTO {} VALUES (?,?)".format(vessels_db_table)

# get the method to check if the table is empty or not
excel_data_object.check_if_table_is_empty(vessels_df, vessels_db_table, query)

The db.Vessels table is NOT empty.


In [22]:
class DatabaseData(object):
    """
    This is class blueprints that gets ethe data from the database and execute queries.
    """
    def __init__(self):
        self.conn = conn    
    
    def get_table_data(self, table_name):
        """
        This method retrieves records from the database
        
        arg:
        ---table_name: the table_name that we want to access
        
        return:
        --- table: table from fetched from the database
        """
        query = "SELECT * FROM {table}".format(table = table_name)

        table = pd.read_sql_query(query,self.conn)

        return table  
    
    def get_table_columns(self, table):
        """
        This method lists column names
        
        arg:
        --- table: this is a table
        
        returns:
        --- columns: lists of all columns
        """
        
        columns = table.columns.values.tolist()
        
        return columns

In [23]:
# initialiase table object
table_object = DatabaseData()

# get dsr data from the database
dsr_table = table_object.get_table_data("db.DSR")

# get the number of columns
dsr_columns = table_object.get_table_columns(dsr_table)

In [24]:
# initialiase table object
table_object = DatabaseData()

# get dsr data from the database
varieties_table = table_object.get_table_data("db.Varieties")

# get the number of columns
varieties_columns = table_object.get_table_columns(varieties_table)


## 2. Produce an overview of the exporter's business.

#### 2.1 How many producers supplies fruit to this exporter.
Assuming that each producer has a unique ID

In [26]:
table_name =  "db.DSR"
query = """SELECT COUNT(DISTINCT ProducerID) AS "PRODUCERS" FROM {table}""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

PRODUCERS
56


#### 2.2 How many different varieties of fruit are being exported?
Assuming each fruit exported has a unique ID e.g. Banana 11, Apple 12 etc

In [27]:
table_name =  "db.DSR"
query = """SELECT COUNT (DISTINCT VarietyID) AS "NO OF DIFFERENT VARIETIES" FROM {table}""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

NO OF DIFFERENT VARIETIES
94


#### 2.3 How many tons per variety are being supplied to the exporter?

In [28]:
table_name =  "db.DSR"
query = """SELECT VarietyID, COUNT("Gross Weight") AS "Tons" FROM {table} GROUP BY VarietyID, "To Barcode";""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

VarietyID,Tons
806,3
74,3
251,39
393,246
806,9
806,57
628,66
177,9
937,21
370,165


#### 2.4 What is the average return are growers getting per standard carton?

In [29]:
table_name =  "db.DSR"
query = """SELECT "No Cartons", AVG("Return To Grower") AS "AVERAGE" FROM {table} GROUP BY "No Cartons";""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

No Cartons,AVERAGE
261,5726.77946
23,2054.313688
238,11054.383911
355,12780.327989
570,23549.477555
46,4569.357666
378,16679.575722
215,9097.118908
2207,44938.445197
69,10805.197287


#### 2.5 What are the most prominent Target Markets that fruit are exported to?

In [30]:
table_name =  "db.DSR"
query = """SELECT  "Target Market", COUNT("Target Market") AS "FREQUENCY"\
FROM {table} GROUP BY "Target Market" ORDER BY "FREQUENCY" DESC;""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

Target Market,FREQUENCY
EU,123405
LO,120249
NI,84879
NL,34857
ME,29340
UK,25854
BE,9849
FE,7818
SA,6015
CA,3906


## 3. Provide an overview of the producer named Corellia's.
Note: The ProducerID is a foreign key in DSR table which references the ID in the Producer's Table. For this Question we use 
Corellia's ID to filter out other Producers. 

In [37]:
query = """SELECT * FROM db.DSR WHERE ProducerID = 108607;"""
corellia_df = pd.read_sql_query(query, conn)
corellia_df
#corellia_df = pd.DataFrame(SQL_Query)  
#corellia_df.style.hide_index()

Unnamed: 0,ID,Year,ProducerID,FarmID,Sales Week,Pallet ID,Sale ID,Barcode,Grade Code,Mark Code,VarietyID,Pack Code,Count Code,Exchange Rate,Currency,QC,Sequence Number,Production ID,Run Number,Container No,VesselID,Port Of Discharge,Target Market,Inventory Code,Trader ID,Pallet Size,From Barcode,To Barcode,Invoice ID,Selected,No Cartons,Advance Price,Producer Adjustment,Advance Purchase,Purchase Price,Final Price,Total,Debit Note,Credit Note,Total Local Cost,Total Export Cost,Total Weight,Pack Nett Weight,Nett Weight,Pack Gross Weight,Gross Weight,FOB,DIP,VAT,Return To Grower
0,20456,2020,108607,1321,39,154881,4395,60091600190475200,P,IVO,441,J60B,M,16.002402,USD,False,1,0.0,215,MNBU3497975,179,HKHKG,EU,UL,TRADER02,1.0,60091600190475200,60091600190475200,12465,0.0,6,0,0,400.0,0.0,276.00,1656.00,0.0,0.0,1375.160784,0.0,0,600.00,1200.0,660.00,1320.0,276.00,4039.832562,52.749968,7085.257072
1,20457,2020,108607,1321,39,171054,4395,60091600190475200,P,IVO,441,J60B,M,16.002402,USD,False,1,0.0,215,MNBU3497975,179,HKHKG,EU,UL,TRADER02,1.0,60091600190475200,60091600190475200,12465,0.0,9,0,0,400.0,0.0,276.00,2484.00,0.0,0.0,1375.160784,0.0,0,600.00,1200.0,660.00,1320.0,276.00,4039.832562,154.749968,7085.257072
2,20458,2020,108607,1321,39,66701,4395,60091600190475200,P,IVO,441,J60B,M,16.002402,USD,False,1,0.0,215,MNBU3497975,179,HKHKG,EU,UL,TRADER02,1.0,60091600190475200,60091600190475200,12465,0.0,12,0,0,400.0,0.0,276.00,3312.00,0.0,0.0,1375.160784,0.0,0,600.00,1200.0,660.00,1320.0,276.00,4039.832562,184.749968,7085.257072
3,20498,2020,108607,1321,40,71844,4241,60091600190475104,P,IVO,441,J60B,S,19.454511,EUR,False,1,0.0,215,MEDU9156240,7071,SESTO,EU,UL,TRADER02,1.0,60091600190475104,60091600190475104,16181,0.0,6,0,0,400.0,0.0,276.00,1656.00,0.0,0.0,3637.274699,0.0,0,600.00,1200.0,660.00,1320.0,276.00,3890.140989,7.325005,6746.534514
4,20499,2020,108607,1321,40,176520,4241,60091600190475104,P,IVO,441,J60B,S,19.454511,EUR,False,1,0.0,215,MEDU9156240,7071,SESTO,EU,UL,TRADER02,1.0,60091600190475104,60091600190475104,16181,0.0,6,0,0,400.0,0.0,276.00,1656.00,0.0,0.0,3637.274699,0.0,0,600.00,1200.0,660.00,1320.0,276.00,3890.140989,57.325005,6746.534514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12529,456915,2016,108607,1321,43,65296,7272,560091600190483968,1,AMA,441,A15C,88,13.820000,USD,False,1,0.0,184,CGMU9407704,2909,GALBV,ME,WA,TRADER02,1.0,560091600190483968,560091600190483968,8167,0.0,87,0,0,0.0,0.0,10.35,900.45,0.0,0.0,1292.886461,0.0,0,15.65,1252.0,17.55,1404.0,10.35,141.561916,259.004105,9642.569862
12530,456916,2016,108607,1321,43,188665,7272,560091600190483968,1,AMA,441,A15C,88,13.820000,USD,False,1,0.0,184,CGMU9407704,2909,GALBV,ME,WA,TRADER02,1.0,560091600190483968,560091600190483968,8167,0.0,93,0,0,0.0,0.0,10.35,962.55,0.0,0.0,1292.886461,0.0,0,15.65,1252.0,17.55,1404.0,10.35,141.561916,254.004105,9642.569862
12531,456917,2016,108607,1321,43,183255,7272,560091600190483968,1,AMA,441,A15C,88,13.820000,USD,False,1,0.0,184,CGMU9407704,2909,GALBV,ME,WA,TRADER02,1.0,560091600190483968,560091600190483968,8167,0.0,85,0,0,0.0,0.0,10.35,879.75,0.0,0.0,1292.886461,0.0,0,15.65,1252.0,17.55,1404.0,10.35,141.561916,321.004105,9642.569862
12532,456918,2016,108607,1321,43,139035,7272,660091600190404992,1,AMA,441,A15C,88,13.820000,USD,False,1,0.0,184,CGMU9407704,2909,GALBV,ME,WA,TRADER02,1.0,660091600190404992,660091600190404992,8167,0.0,93,0,0,0.0,0.0,10.35,962.55,0.0,0.0,1292.886461,0.0,0,15.65,1252.0,17.55,1404.0,10.35,141.561916,235.004105,9642.569862


#### 3.2 What are the most prominent fruit variety groups, based on tons produced ?

In [73]:
query = """SELECT db.Varieties."Variety Group", COUNT("Variety Group") AS "Most Prominent"\
           FROM db.Varieties, db.DSR WHERE db.Varieties.ID = db.DSR.VarietyID AND ProducerID = 108607\
           GROUP BY db.Varieties."Variety Group", db.DSR."Gross Weight" ORDER BY COUNT(*) DESC;"""

sql_query = pd.read_sql_query(query, conn)
corellia_df = pd.DataFrame(sql_query)   
corellia_df.style.hide_index()

Variety Group,Most Prominent
CLE,1101
NAV,1089
VAL,843
VAL,705
ANL,576
NLT,390
VAL,381
NAV,354
0,348
VAL,345


#### 3.3 What return did the producer receive for 2020 in comparison with 2019?

In [102]:
query = """SELECT Year, "Return To Grower", LAG("Return To Grower") OVER ( ORDER BY Year ) AS "Revenue Previous Year"\
           FROM db.DSR WHERE Year IN (2019, 2020) AND ProducerID = 108607;"""

sql_query = pd.read_sql_query(query, conn)
corellia_df = pd.DataFrame(sql_query)   
corellia_df.style.hide_index()

Year,Return To Grower,Revenue Previous Year
2019,5664.249496,
2019,5408.824254,5664.249496
2019,11555.613382,5408.824254
2019,10440.327562,11555.613382
2019,16227.957461,10440.327562
2019,14000.26012,16227.957461
2019,14000.26012,14000.26012
2019,14000.26012,14000.26012
2019,14000.26012,14000.26012
2019,14000.26012,14000.26012


#### 3.4 What is the distribution of the quality grades of the producer's fruit?

#### 3.5 What volume of cartons by cont is parked for the producer?

#### 3.6 Show the same information for any of the other producers and across multiple years.

### 4. Provide an estimate of the year-end returns for the Mustafar for 2021 ?

### 5. What is the best performing farm ( of any producer) for this exporter?