In [None]:
import calendar
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pkg_resources
from scipy.stats import linregress
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate
import types

### Package requirements for reproducibility

In [None]:
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]
            
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]

        yield name
imports = list(set(get_imports()))

requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

#### Define the initial dataset you'll be working on

In [None]:
SatelliteJuly = pd.read_excel('Data.xlsx',sheet_name='July_sat')
SatelliteJune = pd.read_excel('Data.xlsx',sheet_name='June_sat')
SatelliteMay = pd.read_excel('Data.xlsx',sheet_name='May_sat')
SatelliteApril = pd.read_excel('Data.xlsx',sheet_name='April_satellite')
Seed = pd.read_excel('Data.xlsx',sheet_name='Chem_comp_wheat',usecols=[1,2,3,4,5,6,7,8])
Dough = pd.read_excel('Data.xlsx',sheet_name='Dough',usecols=['W','P/L'])
Bread = pd.read_excel('Data.xlsx',sheet_name='Bread',usecols=[2,3,4,5,6,9])
Seed2[['Prot sol acq','Prot nacl','Prot etoh', 'Prot ac ac']] = \
Seed[['Prot sol acq','Prot nacl','Prot etoh', 'Prot ac ac']].apply(lambda x: Seed.T.iloc[3].T * x/100)

In [None]:
SatelliteData = pd.concat([SatelliteApril,SatelliteMay,SatelliteJune,SatelliteJuly],axis=1)

### Let's build a first regression between reflectance data and the features we are examining

In [None]:
names = ['Seed','Dough','Bread']
db = [Seed,Dough,Bread]
Fc = [(names[idb],c) for idb,d in enumerate(db) for c in d ]
Features = pd.concat([Seed,Dough,Bread],axis=1)
Features.columns = pd.MultiIndex.from_tuples(Fc,names=('Stage','Feature'))

In [None]:
bl1 = []
bli = []
for ib in SatelliteData:
    bl = []
    bli.append((calendar.month_abbr[int(ib[-1])],ib[:4]))
    for c in Features:
        bl.append(linregress(SatelliteData[ib],Features[c])[2]**2)
    bl1.append(bl)
Regression_df = pd.concat([pd.DataFrame(b1) for b1 in bl1],axis=1).T
Regression_df.index = pd.MultiIndex.from_tuples(bli,names=('Month','Index'))
Regression_df.columns = Features.column

### Query to see what is the highest regression figure

In [None]:
Regression_month = Regression_df.groupby(level=[0])
Regression_index = Regression_df.groupby(level=[1])
Regression_feature = Regression_df.T.groupby(level=[0])

In [None]:
Regressions = [Regression_month,Regression_index,Regression_feature]
for rs in Regressions:
    for idx, rf in rs:
        print(f'Feature: {idx}')
        print('-' * 16)
        print(rf[rf>0.1].count(), end='\n\n')
        print(rf[rf>0.1].max(), end='\n\n')

In [81]:
Regressions = [Regression_month,Regression_index,Regression_feature]
for rs in Regressions:
    for idx, rf in rs:
        print(f'Feature: {idx}')
        print('-' * 16)
        print(rf[rf>0.3].count(), end='\n\n')
        print(rf[rf>0.3].max(), end='\n\n')

Feature: Apr
----------------
Stage  Feature     
Seed   p hl            0
       N               0
       C               0
       Prot tot        0
       Prot sol acq    0
       Prot nacl       0
       Prot etoh       0
       Prot ac ac      0
Dough  W               3
       P/L             0
Bread  um cr           0
       um mol          0
       hard            0
       Spr             0
       Coh             0
       d mol           0
dtype: int64

Stage  Feature     
Seed   p hl                NaN
       N                   NaN
       C                   NaN
       Prot tot            NaN
       Prot sol acq        NaN
       Prot nacl           NaN
       Prot etoh           NaN
       Prot ac ac          NaN
Dough  W               0.30185
       P/L                 NaN
Bread  um cr               NaN
       um mol              NaN
       hard                NaN
       Spr                 NaN
       Coh                 NaN
       d mol               NaN
dtype: float64

Feat