### Import Libraries

In [1]:
import pyodbc
import pandas as pd
import os
import time
import numpy as np

In [2]:
# root directory of the assessment
root_dir = os.getcwd()     

# this is path to the dataset
dataset = os.path.join(root_dir, "dataset")

# load excel dataset
dsr_data = os.path.join(dataset, "DSR.xlsx")
farms_data = os.path.join(dataset, "farms.xlsx")
producers_data = os.path.join(dataset, "producers.xlsx")
varieties_data = os.path.join(dataset, "varieties.xlsx")
vessels_data = os.path.join(dataset, "vessels.xlsx")

# loop through all the drivers we have access to
for driver in pyodbc.drivers():
    print(driver)

SQL Server
SQL Server Native Client 11.0
ODBC Driver 17 for SQL Server
SQL Server Native Client RDA 11.0


### Connecting to the Databases

In [3]:
class ConnectDB(object):
    """
    This is a blueprint for codnnecting to the local databases
    """
    
    def __init__(self):
        self.server = "DESKTOP-E5PL80T\SQLEXPRESS"
        self.database = "Atom_Assessment_2021"
    
    def get_connection(self):
        """
        This method connects to the database
        :returns:
            cur, conn
        """

        try:
            # connect to the database
            conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server}; \
                   SERVER=' + self.server + '; \
                   DATABASE=' + self.database +';\
                   Trusted_Connection=yes;')
            cursor = conn.cursor()
            print("Connecion created")
        
        except Exception as e:
                
            print("Error: Could not get the cursor to the database")
            print(e)
        
        return conn, cursor      

In [4]:
# start time
start_time = time.time()

# define db connect object
connect_db_object = ConnectDB()

# establish the connection 
conn, cursor = connect_db_object.get_connection()
print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Connecion created
Time to connect to the DB:  0.00161 minutes


### Display Table Names from the Server

In [5]:
# sql quecry for displaying all tables from the databases
cursor.execute("SELECT table_name FROM information_schema.tables;")

# fetch all tables
tables  = cursor.fetchall()

# iterate through the database and display all table names
for table_name in tables:
    print(table_name)

('DSR', )
('Farms', )
('Producers', )
('Varieties', )
('Vessels', )


### Load Excel Dataset

In [6]:
class DataPreprocessing(object):
    """
    This is a blueprint for loading dataset
    """
    def __init__(self):
        pass
    
    def get_excel_data(self, file_name):
        """
        This method loads excel dataset
        
        args:
            file_name: the name of the file
            excel_file: file to import into the sql database
            
        returns: 
            dataframe
        """
        pd.set_option("display.max_columns", None)
        df = pd.read_excel(open(file_name,'rb'))
        
        return df
    
#     def load_data_to_db(self, tb_name, conn, data):
#         """
#         This method loads excel data to the dabase
        
#         """
        
#         # data.to_sql('book_details', con = engine, if_exists = 'append', chunksize = 1000)
        
#         sql_insert = "INSERT INTO "
#         data.to_sql(tb_name , con = conn, if_exists = "append")
#         print("Successfully imported {} data into the database".format( data))
        
    def get_preprocessing(self, df):
        """
        This method prints the sum of null values and replace null values with 0 for easier processing.
        
        arg:
        --- df: this is an excel dataframe with null values
        
        returns:
        --- df: preprocesses excel dataframe 
        """
        
        # compute the sum of null values in the dataframe
        sum_of_null_values = df.isnull().sum()
            
        print("Sum of null values in the dataframe:")    
        print(sum_of_null_values)
        
        # replaced inf values with nan and it worked
        df.replace([np.inf, -np.inf], np.nan, inplace = True)
        
        # fill ann null values with zeros 
        df = df.fillna(0)
        
        return df

### Load Detailed Season Report (DSR) Raw Dataset

In [7]:
# excel data object
excel_data_object = DataPreprocessing()

# load dsr dataframe 
dsr_df = excel_data_object.get_excel_data(dsr_data)

print("Time to load DSR data: ",round((time.time() - start_time)/60,5),"minutes")

Time to load DSR data:  1.84504 minutes


In [8]:
# initialize preprocessing object
preprocessing_object = DataPreprocessing()

# get DRS dataframe
dsr_df = preprocessing_object.get_preprocessing(dsr_df)

# visualize DSR raw dataset
dsr_df.head()

Sum of null values in the dataframe:
Year                       0
ProducerID                 0
FarmID                     0
Sales Week                 0
Pallet ID                  0
Sale ID                    0
Barcode                    0
Grade Code                 0
Mark Code                  0
VarietyID                  0
Pack Code                  0
Count Code                 2
Exchange Rate              0
Currency                   0
QC                         0
Sequence Number            0
Production ID            209
Run Number             96551
Container No           55337
VesselID                   0
Port Of Discharge      42479
Target Market              0
Inventory Code           587
Trader ID                  0
Pallet Size                0
From Barcode             211
To Barcode                 0
Invoice ID                 0
Selected                 209
No Cartons                 0
Advance Price              0
Producer Adjustment        0
Advance Purchase          44
Purcha

Unnamed: 0,Year,ProducerID,FarmID,Sales Week,Pallet ID,Sale ID,Barcode,Grade Code,Mark Code,VarietyID,Pack Code,Count Code,Exchange Rate,Currency,QC,Sequence Number,Production ID,Run Number,Container No,VesselID,Port Of Discharge,Target Market,Inventory Code,Trader ID,Pallet Size,From Barcode,To Barcode,Invoice ID,Selected,No Cartons,Advance Price,Producer Adjustment,Advance Purchase,Purchase Price,Final Price,Total,Debit Note,Credit Note,Total Local Cost,Total Export Cost,Total Weight,Pack Nett Weight,Nett Weight,Pack Gross Weight,Gross Weight,FOB,DIP,VAT,Return To Grower
0,2017,201100,45,24,201233,3124,960091600127485952,1,AMA,74,A15C,72,13.424682,USD,False,1,291.0,0,CXRU1499008,6798,BDCGP,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,6100,1.0,88,0,0,0.0,0.0,10.5,924.0,0.0,0.0,672.090194,0.0,0,16.0,1280.0,16.95,1356.0,10.5,139.324307,-55.907373,10074.41044
1,2017,201100,45,25,207565,10375,960091600127457024,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,11024,1.0,91,0,0,0.0,0.0,10.5,955.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,-47.779393,9875.083022
2,2017,201100,45,25,100285,10375,960091600127485952,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,11024,1.0,93,0,0,0.0,0.0,10.5,976.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,52.220607,9875.083022
3,2017,201100,45,25,181685,7374,960091600127457024,1,AMA,74,A15C,64,13.583237,USD,False,1,291.0,0,PONU4875876,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,2961,1.0,89,0,0,0.0,0.0,10.5,934.5,0.0,0.0,754.994818,0.0,0,16.0,1280.0,16.95,1356.0,10.5,141.074018,-40.300725,10122.17772
4,2017,201100,45,26,66238,6697,960091600127469952,1,AMA,700,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,85,0,0,0.0,0.0,7.0,595.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,73.75377,6614.094181


### Load Farms Raw Dataset

In [9]:
# start time
start_time = time.time()

# loads farms raw dataset
farms_df  = excel_data_object.get_excel_data(farms_data)

print("Time to loads farms data: ",round((time.time() - start_time)/60,5),"minutes")

Time to loads farms data:  0.00053 minutes


In [10]:
# get Farms dataframe
farms_df = preprocessing_object.get_preprocessing(farms_df)

# visualize Farms raw dataset
farms_df.head()

Sum of null values in the dataframe:
ID           0
Farm Name    0
dtype: int64


Unnamed: 0,ID,Farm Name
0,45,Tel Dan
1,54,Urkish
2,62,Lehi
3,83,Shur
4,154,Havilah


### Load Producers Raw Dataset

In [11]:
# start time
start_time = time.time()

# loads producers raw dataset
producers_df  =  excel_data_object.get_excel_data(producers_data)

print("Time to load producers: ",round((time.time() - start_time)/60,5),"minutes")

Time to load producers:  0.00042 minutes


In [12]:
# get Producers dataframe
producers_df = preprocessing_object.get_preprocessing(producers_df)

# visualize Producers raw dataset
producers_df.head()

Sum of null values in the dataframe:
ID          0
Producer    1
dtype: int64


Unnamed: 0,ID,Producer
0,201100,Alderaan
1,318820,Yavin IV
2,351986,Hoth
3,240822,Dagobah
4,11959,Bespin


### Load Varieties Raw Dataset

In [13]:
# start time
start_time = time.time()

# loads varieties raw dataset
varieties_df  = excel_data_object.get_excel_data(varieties_data)

print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Time to connect to the DB:  0.0006 minutes


In [14]:
# get Varieties dataframe
varieties_df = preprocessing_object.get_preprocessing(varieties_df)

# visualize Varieties raw dataset
varieties_df.head()

Sum of null values in the dataframe:
ID                 0
Variety Group     16
Variety Code       0
Commodity Code     0
dtype: int64


Unnamed: 0,ID,Variety Group,Variety Code,Commodity Code
0,45,AGN,AGN,OR
1,965,ALN,ALN,OR
2,937,ANL,ANL,OR
3,610,BAR,BAR,GR
4,686,BIN,ELL,SC


### Load Vessels Raw Dataset

In [15]:
# start time
start_time = time.time()

# loads vessels raw dataset
vessels_df = excel_data_object.get_excel_data(vessels_data)

print("Time to connect to the DB: ",round((time.time() - start_time)/60,5),"minutes")

Time to connect to the DB:  0.00065 minutes


In [16]:
# get vessels dataframe
vessels_df = preprocessing_object.get_preprocessing(vessels_df)

# visualize vessels raw dataset
vessels_df.head()

Sum of null values in the dataframe:
ID        0
Vessel    1
dtype: int64


Unnamed: 0,ID,Vessel
0,4271,ADRIAN SCHULTE
1,1347,ALEXANDRA
2,2653,ALMAVIVA
3,4496,ALS Venus
4,6744,AMANDA D


## 1. Import the Excel Data into a SQL Database 

#### Import Detailed Season Report (DSR)

In [18]:
# interate through every row and insert into the database
for row in dsr_df.itertuples(index=False):
    cursor.execute("INSERT INTO db.DSR VALUES (?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?,?,\
                                               ?,?,?,?,?,?,?,?,?)",row)
    
# Save the changes
conn.commit()

#### Import Farms Dataset

In [17]:
# interate through every row and insert into the database
for row in farms_df.itertuples(index=False):
    cursor.execute("INSERT INTO db.Farms VALUES (?,?)",row)
    
# Save the changes
conn.commit()

IntegrityError: ('23000', "[23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Violation of PRIMARY KEY constraint 'PK__Farms__3214EC27A4B5F8D1'. Cannot insert duplicate key in object 'db.Farms'. The duplicate key value is (45). (2627) (SQLExecDirectW); [23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The statement has been terminated. (3621)")

#### Import Producers Dataset

In [19]:
# interate through every row and insert into the database
for row in producers_df.itertuples(index=False):
    cursor.execute("INSERT INTO db.Producers VALUES (?,?)",row)
    
# Save the changes
conn.commit()

IntegrityError: ('23000', "[23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Violation of PRIMARY KEY constraint 'PK__Producer__3214EC27AABCB035'. Cannot insert duplicate key in object 'db.Producers'. The duplicate key value is (201100). (2627) (SQLExecDirectW); [23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The statement has been terminated. (3621)")

#### Import Varieties Dataset

In [20]:
# interate through every row and insert into the database
for row in varieties_df.itertuples(index=False):
    cursor.execute("INSERT INTO db.Varieties VALUES (?,?,?,?)",row)
    
# Save the changes
conn.commit()

IntegrityError: ('23000', "[23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Violation of PRIMARY KEY constraint 'PK__Varietie__3214EC27478C828C'. Cannot insert duplicate key in object 'db.Varieties'. The duplicate key value is (45). (2627) (SQLExecDirectW); [23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The statement has been terminated. (3621)")

#### Import Vessels Dataset

In [21]:
# interate through every row and insert into the database
for row in vessels_df.itertuples(index=False):
    cursor.execute("INSERT INTO db.Vessels VALUES (?,?)",row)
    
# Save the changes
conn.commit()

IntegrityError: ('23000', "[23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Violation of PRIMARY KEY constraint 'PK__Vessels__3214EC276E009335'. Cannot insert duplicate key in object 'db.Vessels'. The duplicate key value is (4271). (2627) (SQLExecDirectW); [23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The statement has been terminated. (3621)")

In [70]:
class DatabaseData(object):
    """
    This is class blueprints that gets ethe data from the database and execute queries.
    """
    def __init__(self):
        self.conn = conn    
    
    def get_table_data(self, table_name):
        """
        This method retrieves records from the database
        
        arg:
        ---table_name: the table_name that we want to access
        
        return:
        --- table: table from fetched from the database
        """
        query = "SELECT * FROM {table}".format(table = table_name)

        table = pd.read_sql_query(query,self.conn)

        return table  
    
    def get_table_columns(self, table):
        """
        This method lists column names
        
        arg:
        --- table: this is a table
        
        returns:
        --- columns: lists of all columns
        """
        
        columns = table.columns.values.tolist()
        
        return columns

In [139]:
# initialiase table object
table_object = DatabaseData()

# get dsr data from the database
dsr_table = table_object.get_table_data("db.DSR")

# get the number of columns
dsr_columns = table_object.get_table_columns(dsr_table)
dsr_columns

['ID',
 'Year',
 'ProducerID',
 'FarmID',
 'Sales Week',
 'Pallet ID',
 'Sale ID',
 'Barcode',
 'Grade Code',
 'Mark Code',
 'VarietyID',
 'Pack Code',
 'Count Code',
 'Exchange Rate',
 'Currency',
 'QC',
 'Sequence Number',
 'Production ID',
 'Run Number',
 'Container No',
 'VesselID',
 'Port Of Discharge',
 'Target Market',
 'Inventory Code',
 'Trader ID',
 'Pallet Size',
 'From Barcode',
 'To Barcode',
 'Invoice ID',
 'Selected',
 'No Cartons',
 'Advance Price',
 'Producer Adjustment',
 'Advance Purchase',
 'Purchase Price',
 'Final Price',
 'Total',
 'Debit Note',
 'Credit Note',
 'Total Local Cost',
 'Total Export Cost',
 'Total Weight',
 'Pack Nett Weight',
 'Nett Weight',
 'Pack Gross Weight',
 'Gross Weight',
 'FOB',
 'DIP',
 'VAT',
 'Return To Grower']

In [140]:
dsr_table.head(30)

Unnamed: 0,ID,Year,ProducerID,FarmID,Sales Week,Pallet ID,Sale ID,Barcode,Grade Code,Mark Code,VarietyID,Pack Code,Count Code,Exchange Rate,Currency,QC,Sequence Number,Production ID,Run Number,Container No,VesselID,Port Of Discharge,Target Market,Inventory Code,Trader ID,Pallet Size,From Barcode,To Barcode,Invoice ID,Selected,No Cartons,Advance Price,Producer Adjustment,Advance Purchase,Purchase Price,Final Price,Total,Debit Note,Credit Note,Total Local Cost,Total Export Cost,Total Weight,Pack Nett Weight,Nett Weight,Pack Gross Weight,Gross Weight,FOB,DIP,VAT,Return To Grower
0,1,2017,201100,45,24,201233,3124,960091600127485952,1,AMA,74,A15C,72,13.424682,USD,False,1,291.0,0,CXRU1499008,6798,BDCGP,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,6100,1.0,88,0,0,0.0,0.0,10.5,924.0,0.0,0.0,672.090194,0.0,0,16.0,1280.0,16.95,1356.0,10.5,139.324307,-55.907373,10074.41044
1,2,2017,201100,45,25,207565,10375,960091600127457024,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,11024,1.0,91,0,0,0.0,0.0,10.5,955.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,-47.779393,9875.083022
2,3,2017,201100,45,25,100285,10375,960091600127485952,1,AMA,74,A15C,64,13.21,USD,False,1,291.0,0,MWCU6671193,2276,SAJED,ME,WA,TRADER02,1.0,960091600127485952,960091600127485952,11024,1.0,93,0,0,0.0,0.0,10.5,976.5,0.0,0.0,701.575766,0.0,0,16.0,1280.0,16.95,1356.0,10.5,137.090203,52.220607,9875.083022
3,4,2017,201100,45,25,181685,7374,960091600127457024,1,AMA,74,A15C,64,13.583237,USD,False,1,291.0,0,PONU4875876,2276,SAJED,ME,WA,TRADER02,1.0,960091600127457024,960091600127457024,2961,1.0,89,0,0,0.0,0.0,10.5,934.5,0.0,0.0,754.994818,0.0,0,16.0,1280.0,16.95,1356.0,10.5,141.074018,-40.300725,10122.17772
4,5,2017,201100,45,26,66238,6697,960091600127469952,1,AMA,700,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,85,0,0,0.0,0.0,7.0,595.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,73.75377,6614.094181
5,6,2017,201100,45,26,129276,6697,960091600127469952,1,AMA,700,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,93,0,0,0.0,0.0,7.0,651.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,187.75377,6614.094181
6,7,2017,201100,45,26,189240,6697,960091600127469952,1,AMA,74,A15C,105,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,94,0,0,0.0,0.0,10.5,987.0,0.0,0.0,660.897897,0.0,0,16.0,1280.0,16.95,1356.0,10.5,139.321757,21.525706,10074.531997
7,8,2017,201100,45,26,176342,6697,960091600127469952,1,AMA,74,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,94,0,0,0.0,0.0,7.0,658.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,-16.24623,6614.094181
8,9,2017,201100,45,26,116224,6697,960091600127469952,1,AMA,74,A15C,125,13.41151,USD,False,1,291.0,0,PONU4883819,1337,SAJED,ME,WA,TRADER02,1.0,960091600127469952,960091600127469952,15710,1.0,86,0,0,0.0,0.0,7.0,602.0,0.0,0.0,548.241212,0.0,0,16.0,1280.0,16.95,1356.0,7.0,92.381472,129.75377,6614.094181
9,10,2017,201100,45,26,105394,3589,960091600127454976,1,AMA,74,A15C,125,1.0,ZAR,False,1,291.0,0,MWMU6450417,1337,MUPLU,ME,WA,TRADER02,1.0,960091600127454976,960091600127454976,15159,1.0,81,0,0,0.0,0.0,80.0,6480.0,0.0,0.0,665.2,0.0,0,16.0,1280.0,16.95,1356.0,80.0,78.57,70.128,5448.06


In [80]:
# initialiase table object
table_object = DatabaseData()

# get dsr data from the database
varieties_table = table_object.get_table_data("db.Varieties")

# get the number of columns
varieties_columns = table_object.get_table_columns(varieties_table)
varieties_columns

['ID', 'Variety Group', 'Variety Code', 'Commodity Code']

In [83]:
varieties_table.head(20)

Unnamed: 0,ID,Variety Group,Variety Code,Commodity Code
0,7,0,ORM,SC
1,18,CLE,CLE,SC
2,23,GRA,CAB,GR
3,37,0,LMA,SC
4,39,MAN,MAN,SC
5,43,0,MRR,SC
6,44,NAV,OR,OR
7,45,AGN,AGN,OR
8,51,CVL,CVL,SC
9,52,RNN,RNN,OR


## 2. Produce an overview of the exporter's business.

#### 2.1 How many producers supplies fruit to this exporter.
Assuming that each producer has a unique ID

In [135]:
table_name =  "db.DSR"
query = """SELECT COUNT(DISTINCT ProducerID) AS "PRODUCERS" FROM {table}""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

PRODUCERS
56


#### 2.2 How many different varieties of fruit are being exported?
Assuming each fruit exported has a unique ID e.g. Banana 11, Apple 12 etc

In [136]:
table_name =  "db.DSR"
query = """SELECT COUNT (DISTINCT VarietyID) AS "NO OF DIFFERENT VARIETIES" FROM {table}""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

NO OF DIFFERENT VARIETIES
94


#### 2.3 How many tons per variety are being supplied to the exporter?

#### 2.4 What is the average return are growers getting per standard carton?

In [137]:
table_name =  "db.DSR"
query = """SELECT  "No Cartons", AVG("Return To Grower") AS "AVERAGE" FROM {table} GROUP BY "No Cartons";""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

No Cartons,AVERAGE
261,5726.77946
23,2054.313688
238,11054.383911
355,12780.327989
570,23549.477555
46,4569.357666
378,16679.575722
215,9097.118908
2207,44938.445197
69,10805.197287


#### 2.5 What are the most prominent Target Markets that fruit are exported to?

In [153]:
table_name =  "db.DSR"
query = """SELECT  "Target Market", COUNT("Target Market") AS "MOST PROMINENT"\
FROM {table} GROUP BY "Target Market" ORDER BY "MOST PROMINENT" DESC;""".format(table = table_name)
SQL_Query = pd.read_sql_query(query, conn)
df = pd.DataFrame(SQL_Query)   
df.style.hide_index()

Target Market,MOST PROMINENT
EU,123405
LO,120249
NI,84879
NL,34857
ME,29340
UK,25854
BE,9849
FE,7818
SA,6015
CA,3906


## 3. Provide an overview of the producer named Corellia's.

#### 3.1 How many tons is the producer producing by hectare of their farm?

#### 3.2 What are the most prominent fruit variety groups, based on tons produced ?

#### 3.3 What return did the producer receive for 2020 in comparison with 2019?

#### 3.4 What is the distributionof the quality grades of the producer's fruit?

#### 3.5 What volume of cartons by cont is parked for the producer?

#### 3.6 Show the same information for any of the other producers and across multiple years.

In [100]:
# table_name =  "db.DSR"
# query = """SELECT COUNT (DISTINCT VarietyID) FROM {table}""".format(table = table_name)
# cursor.execute(query)
# data = cursor.fetchall()
# print("No of different varieties of fruit: {}".format(data))


In [101]:
df

Unnamed: 0,sum
0,94
