In [1]:
import os

def get_repo_dir():
    cwd = os.getcwd()
    splited = cwd.split('/')
    ind = splited.index('fsCounter')
    repo_dir = '/'
    for s in splited[1:ind + 1]:
        repo_dir = os.path.join(repo_dir, s)

    return repo_dir

In [2]:
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display



sys.path.append(get_repo_dir())

from vision.tools.jupyter_notebooks.notebook_analysis_help_funcs import *
from vision.tools.post_process_analysis import read_tracks_and_slices, get_block_count
from vision.visualization.draw_bb_from_csv import draw_tree_bb_from_tracks

In [3]:
def concat_to_meta(block_meta, df):
    df_col = list(block_meta.columns)
    df['block'] = df['block'].str.lower()
    new_data = []
    data = block_meta.copy()
    for id_, sample in data.iterrows():
        block = sample['block'].lower()
        row = sample['row'].lower()
        tree_id = int(sample['tree_id'])
        print(f'block == "{block}" and row == "{row}" and tree_id == {tree_id}')
        q_data = df.query(f'block == "{block}" and row == "{row}" and tree_id == {tree_id}')
        new_sample = sample.to_list()
        for i in range(1, 4):
            new_sample.append(q_data[str(i)].values[0])
        new_data.append(new_sample)

    df_col += ['cv1', 'cv2', 'cv3']
    new_df = pd.DataFrame(new_data, columns=df_col)

    return new_df

def add_ratios(df):
    df['F/cv1'] = df['F'] / df['cv1']
    df['F/cv2'] = df['F'] / df['cv2']
    df['F/cv3'] = df['F'] / df['cv3']

    return df

def get_block_ratio(block_df, row_tracks, y_threshold=800, depth=3):
    block_col = list(block_df.columns)
    new_data = []
    for id_, sample in block_df.iterrows():
        row = sample['row'].lower()
        tree_id = int(sample['tree_id'])
        tree_df = row_tracks[row][tree_id]

        d_tree_df = tree_df.query(f'depth <= {depth}')
        
        
        lower_tree_df = tree_df.query(f'y1 > {y_threshold} and depth <= {depth}')
        count = len(tree_df.track_id.unique())
        lower_count = len(lower_tree_df.track_id.unique())
        if count > 0:
            ratio = lower_count / count
        else:
            ratio = 0

        gdf = lower_tree_df.groupby('track_id')
        lower_tracks_depth = np.array(gdf.depth.mean())
        filtered_lower_tracks_depth = lower_tracks_depth[lower_tracks_depth < 3]
        mean = np.mean(filtered_lower_tracks_depth)
        std = np.std(filtered_lower_tracks_depth)          
        
        new_sample = sample.to_list()
 
        uniq, counts = np.unique(d_tree_df["track_id"], return_counts=True)
        for i in range(0, 5):
            new_sample.append(len(uniq[counts>i]))
            
        new_data.append(new_sample)

    #block_col += ['y_ratio', 'mean', 'std', 'lcv1', 'lcv2', 'lcv3', 'lcv4', 'lcv5']
    block_col += ['dcv1', 'dcv2', 'dcv3', 'dcv4', 'dcv5']
    new_df = pd.DataFrame(new_data, columns=block_col)

    return new_df    

In [4]:
def linear_model_selection(data, selection_cols=["cv1"], type_col="block", cross_val='row'):

    factors = {}
    for col in selection_cols:
        factor, res_mean, res_std, tree_mean, tree_std, all_preds = run_LROCV(data, cv_col=col, type_col=type_col, cross_val=cross_val, return_res=True)
        factors[col] = {'factor': factor, 'mean_error': res_mean, 'std_error': res_std}

    return factors
    

In [5]:
def block_analysis(block_path, metadata_path, block_):
    block_counts, row_tracks = get_block_count(block_path)
    block_counts_df = pd.DataFrame(block_counts, columns=['tree_id', 'block', 'row', '1', '2','3'])
    meta_data = pd.read_csv(metadata_path)
    block_meta = meta_data.query(f'block == "{block_}"')
    block_df = concat_to_meta(block_meta, block_counts_df)
    block_df['F/cv1'] = block_df['F'] / block_df['cv1'] 
    block_df = get_block_ratio(block_df, row_tracks)

    return block_df, row_tracks

In [6]:
def get_selection_error(factors_dict, block_df):
    results = {}
    for item_ in list(factors_dict.keys()):
        block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']
        results[item_] = {'err': np.mean(block_df[f'err_{item_}']), 'err_std': np.std(block_df[f'err_{item_}'])}

    return results, block_df

In [7]:
metadata_path = "/media/matans/My Book/FruitSpec/Apples_SA/data_meta.csv"

In [24]:
block_path = "/media/matans/My Book/FruitSpec/Apples_SA/block_13"
block_ = 'Block_13'
block_13_df, row_tracks_13 = block_analysis(block_path, metadata_path, block_)

block == "block_13" and row == "row_1111" and tree_id == 2
block == "block_13" and row == "row_1111" and tree_id == 1
block == "block_13" and row == "row_2222" and tree_id == 1
block == "block_13" and row == "row_2222" and tree_id == 2
block == "block_13" and row == "row_3333" and tree_id == 1
block == "block_13" and row == "row_3333" and tree_id == 2
block == "block_13" and row == "row_4444" and tree_id == 1
block == "block_13" and row == "row_4444" and tree_id == 2
block == "block_13" and row == "row_5555" and tree_id == 1
block == "block_13" and row == "row_5555" and tree_id == 2
block == "block_13" and row == "row_6666" and tree_id == 1
block == "block_13" and row == "row_6666" and tree_id == 2
block == "block_13" and row == "row_7777" and tree_id == 1
block == "block_13" and row == "row_7777" and tree_id == 2
block == "block_13" and row == "row_8888" and tree_id == 1
block == "block_13" and row == "row_8888" and tree_id == 2


In [31]:
block_13_df

Unnamed: 0,id,block,row,tree,side,tree_id,variety,Age,F,cv1,cv2,cv3,F/cv1,dcv1,dcv2,dcv3,dcv4,dcv5
0,1,Block_13,Row_1111,9,A,2,Royal Beauty,7,823,973,518,364,0.845838,847,455,308,205,149
1,2,Block_13,Row_1111,10,A,1,Royal Beauty,7,1007,739,425,290,1.362652,696,390,273,209,160
2,3,Block_13,Row_2222,9,B,1,Royal Beauty,7,823,965,488,298,0.85285,822,425,249,172,104
3,4,Block_13,Row_2222,10,B,2,Royal Beauty,7,1007,1047,519,343,0.961796,989,483,315,198,142
4,5,Block_13,Row_3333,11,B,1,Royal Beauty,7,455,586,229,128,0.776451,508,196,105,73,53
5,6,Block_13,Row_3333,12,B,2,Royal Beauty,7,637,780,413,285,0.816667,712,390,271,201,159
6,7,Block_13,Row_4444,12,A,1,Royal Beauty,7,637,937,500,357,0.679829,821,435,309,236,167
7,8,Block_13,Row_4444,11,A,2,Royal Beauty,7,455,511,247,171,0.890411,439,215,142,111,77
8,9,Block_13,Row_5555,14,A,1,Early_red_one,7,1008,1159,799,648,0.869715,1096,745,595,454,366
9,10,Block_13,Row_5555,13,A,2,Early_red_one,7,1440,1814,1199,960,0.793826,1729,1150,908,720,585


In [None]:
#row_to_drow = 'row_8888'
#tree_id = 2
#draw_tree_bb_from_tracks(row_tracks[row_to_drow][tree_id], os.path.join(block_path, row_to_drow, '1'), tree_id)

In [None]:
plt.bar(block_13_df['id'], block_13_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [None]:
plot_F_cv(block_13_df, 1, hue='variety', add_xy_line=False)

In [None]:
plot_F_cv(block_13_df, 1, add_xy_line=False)

In [None]:
factors_13_dict = linear_model_selection(block_13_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')

In [None]:
res, block_13_df = get_selection_error(factors_13_dict, block_13_df)
res

In [33]:
m =block_13_df['cv1'].mean()
s = block_13_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 1053.875, std: 335.03290883135645


In [None]:
factors_13_dict


In [25]:
rb_df = block_13_df.query('variety == "Royal Beauty"')
factors_rb_dict = linear_model_selection(rb_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
res, block_rb_df = get_selection_error(factors_rb_dict, rb_df)
res

true: 455,    pred: 454.053726423814. (0.21 %) (8)
true: 637,    pred: 868.9540813922752. (36.41 %) (7)
true: 637,    pred: 699.9710960128089. (9.89 %) (6)
true: 455,    pred: 525.0320268105656. (15.39 %) (5)
true: 1007,    pred: 911.8164070520211. (9.45 %) (4)
true: 823,    pred: 864.4161256168607. (5.03 %) (3)
true: 1007,    pred: 618.9399970266821. (38.54 %) (2)
true: 823,    pred: 873.1058776389284. (6.09 %) (1)
0.15125960990883153 0.1354175976216873
(0.15125960990883153, 0.1354175976216873)
[0.88864533]
true: 455,    pred: 454.053726423814. (0.21 %) (8)
true: 637,    pred: 868.9540813922752. (36.41 %) (7)
true: 637,    pred: 699.9710960128089. (9.89 %) (6)
true: 455,    pred: 525.0320268105656. (15.39 %) (5)
true: 1007,    pred: 911.8164070520211. (9.45 %) (4)
true: 823,    pred: 864.4161256168607. (5.03 %) (3)
true: 1007,    pred: 618.9399970266821. (38.54 %) (2)
true: 823,    pred: 873.1058776389284. (6.09 %) (1)
0.15125960990883153 0.1354175976216873
true: 455,    pred: 437.299

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

{'cv1': {'err': -0.035839716682566886, 'err_std': 0.17806922943390702},
 'dcv1': {'err': -0.02655285365932468, 'err_std': 0.1602135160938905},
 'cv3': {'err': -0.01470600406854997, 'err_std': 0.22816328824996907},
 'dcv3': {'err': 0.0045666605809217276, 'err_std': 0.22990293523548141}}

In [26]:
factors_rb_dict

{'cv1': {'factor': array([0.89733389]),
  'mean_error': 0.15125960990883153,
  'std_error': 0.1354175976216873},
 'dcv1': {'factor': array([1.00284245]),
  'mean_error': 0.12951353813892663,
  'std_error': 0.12987517762548312},
 'cv3': {'factor': array([2.62031524]),
  'mean_error': 0.21641771555714787,
  'std_error': 0.14424736609120073},
 'dcv3': {'factor': array([2.93413113]),
  'mean_error': 0.22626896841847988,
  'std_error': 0.1294836024949359}}

In [35]:
block_21_path = "/media/matans/My Book/FruitSpec/Apples_SA/block_21"
block_21 = 'Block_21'
block_21_df, row_tracks_21 = block_analysis(block_21_path, metadata_path, block_21)

block == "block_21" and row == "row_1111" and tree_id == 1
block == "block_21" and row == "row_1111" and tree_id == 2
block == "block_21" and row == "row_2222" and tree_id == 1
block == "block_21" and row == "row_2222" and tree_id == 2
block == "block_21" and row == "row_3333" and tree_id == 1
block == "block_21" and row == "row_3333" and tree_id == 2
block == "block_21" and row == "row_4444" and tree_id == 1
block == "block_21" and row == "row_4444" and tree_id == 2


In [None]:
block_21_df

In [36]:
m =block_21_df['cv1'].mean()
s = block_21_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 943.25, std: 187.47247417001935


In [None]:
plot_F_cv(block_21_df, 1, add_xy_line=False)

In [None]:
factors_21_dict = linear_model_selection(block_21_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')

In [None]:
res, block_block_21_df13_df = get_selection_error(factors_21_dict, block_21_df)
res

In [None]:
#row_to_drow = 'row_1111'
#tree_id = 1
#draw_tree_bb_from_tracks(row_tracks[row_to_drow][tree_id], os.path.join(block_path, row_to_drow, '1'), tree_id)

In [None]:
plt.bar(block_21_df['id'], block_21_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [37]:
block_48_path = "/media/matans/My Book/FruitSpec/Apples_SA/block_48"
block_48 = 'Block_48'
block_48_df, row_tracks_48 = block_analysis(block_48_path, metadata_path, block_48)

block == "block_48" and row == "row_111" and tree_id == 1
block == "block_48" and row == "row_111" and tree_id == 2
block == "block_48" and row == "row_222" and tree_id == 1
block == "block_48" and row == "row_222" and tree_id == 2
block == "block_48" and row == "row_333" and tree_id == 1
block == "block_48" and row == "row_333" and tree_id == 2
block == "block_48" and row == "row_444" and tree_id == 1
block == "block_48" and row == "row_444" and tree_id == 2


In [38]:
m =block_48_df['cv1'].mean()
s = block_48_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 1431.375, std: 168.47461657048348


In [None]:
factors_48_dict = linear_model_selection(block_48_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')

In [None]:
res, block_48_df_res = get_selection_error(factors_48_dict, block_48_df)
res

In [None]:
plt.bar(block_48_df['id'], block_48_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [39]:
block_3_path = "/media/matans/My Book/FruitSpec/Apples_SA/block_3"
block_3 = 'Block_3'
block_3_df, row_tracks_3 = block_analysis(block_3_path, metadata_path, block_3)

block == "block_3" and row == "row_111" and tree_id == 1
block == "block_3" and row == "row_111" and tree_id == 2
block == "block_3" and row == "row_222" and tree_id == 1
block == "block_3" and row == "row_222" and tree_id == 2
block == "block_3" and row == "row_333" and tree_id == 1
block == "block_3" and row == "row_333" and tree_id == 2
block == "block_3" and row == "row_444" and tree_id == 1
block == "block_3" and row == "row_444" and tree_id == 2


In [None]:
block_3_df

In [40]:
m =block_3_df['cv1'].mean()
s = block_3_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 744.375, std: 176.69823954171943


In [None]:
factors_3_dict = linear_model_selection(block_3_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')

In [None]:
plt.bar(block_3_df['id'], block_3_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [None]:
res, block_3_df_res = get_selection_error(factors_3_dict, block_3_df)

In [None]:
res

In [None]:
factors_3_dict

In [41]:
block_83_path = "/media/matans/My Book/FruitSpec/Apples_SA/083GOLD0"
block_83 = '083GOLD0'
block_83_df, row_tracks_83 = block_analysis(block_83_path, metadata_path, block_83)

block == "083gold0" and row == "row_111" and tree_id == 1
block == "083gold0" and row == "row_111" and tree_id == 2
block == "083gold0" and row == "row_222" and tree_id == 1
block == "083gold0" and row == "row_222" and tree_id == 2
block == "083gold0" and row == "row_333" and tree_id == 1
block == "083gold0" and row == "row_333" and tree_id == 2
block == "083gold0" and row == "row_444" and tree_id == 1
block == "083gold0" and row == "row_444" and tree_id == 2


In [None]:
block_83_df

In [42]:
m =block_83_df['cv1'].mean()
s = block_83_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 630.625, std: 151.65745000776303


In [None]:
factors_83_dict = linear_model_selection(block_83_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
res, block_83_df_res = get_selection_error(factors_83_dict, block_83_df)

In [None]:
res

In [None]:
factors_83_dict

In [None]:
plt.bar(block_83_df['id'], block_83_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [43]:
block_20_path = "/media/matans/My Book/FruitSpec/Apples_SA/020GRANO"
block_20 = '020GRANO'
block_20_df, row_tracks_20 = block_analysis(block_20_path, metadata_path, block_20)

block == "020grano" and row == "row_1111" and tree_id == 1
block == "020grano" and row == "row_1111" and tree_id == 2
block == "020grano" and row == "row_2222" and tree_id == 1
block == "020grano" and row == "row_2222" and tree_id == 2
block == "020grano" and row == "row_3333" and tree_id == 1
block == "020grano" and row == "row_3333" and tree_id == 2


In [44]:
m =block_20_df['cv1'].mean()
s = block_20_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 1670.5, std: 289.6368415792439


In [None]:
block_20_df

In [None]:
factors_20_dict = linear_model_selection(block_20_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
res, block_20_df_res = get_selection_error(factors_20_dict, block_20_df)

In [None]:
res

In [None]:
factors_20_dict

In [None]:
plt.bar(block_20_df['id'], block_20_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [45]:
block_25_path = "/media/matans/My Book/FruitSpec/Apples_SA/025PINKO"
block_25 = '025PINKO'
block_25_df, row_tracks_25 = block_analysis(block_25_path, metadata_path, block_25)

block == "025pinko" and row == "row_111" and tree_id == 1
block == "025pinko" and row == "row_111" and tree_id == 2
block == "025pinko" and row == "row_222" and tree_id == 1
block == "025pinko" and row == "row_222" and tree_id == 2
block == "025pinko" and row == "row_333" and tree_id == 1
block == "025pinko" and row == "row_333" and tree_id == 2


In [46]:
block_25_df

Unnamed: 0,id,block,row,tree,side,tree_id,variety,Age,F,cv1,cv2,cv3,F/cv1,dcv1,dcv2,dcv3,dcv4,dcv5
0,49,025PINKO,Row_111,10,A,1,Royal Beauty,20,1271,2200,1407,1052,0.577727,1975,1194,883,666,518
1,50,025PINKO,Row_111,13,A,2,Royal Beauty,20,2016,2248,1483,1145,0.896797,1907,1263,984,784,656
2,51,025PINKO,Row_222,13,B,1,Pink Lady,20,2016,1776,1094,812,1.135135,1554,949,688,529,406
3,52,025PINKO,Row_222,10,B,2,Pink Lady,20,1271,1487,880,640,0.854741,1052,546,374,280,201
4,53,025PINKO,Row_333,5,A,1,Royal Beauty,20,1368,1686,1177,905,0.811388,1325,889,678,528,415
5,54,025PINKO,Row_333,6,A,2,Royal Beauty,20,1318,1817,1211,941,0.725371,1337,839,604,458,354


In [47]:
m =block_25_df['cv1'].mean()
s = block_25_df['cv1'].std()
print(f'mean: {m}, std: {s}')

mean: 1869.0, std: 297.98254982465


In [None]:
factors_25_dict = linear_model_selection(block_25_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
res, block_25_df_res = get_selection_error(factors_25_dict, block_25_df)

In [None]:
res

In [None]:
factors_25_dict

In [None]:
plt.bar(block_25_df['id'], block_25_df['F/cv1'])
plt.xlabel('Tree')
plt.ylabel('F/CV1')

In [None]:
#row_to_drow = 'row_111'
#tree_id = 1
#date = '221123'
#draw_tree_bb_from_tracks(row_tracks_25[row_to_drow][tree_id], os.path.join(block_25_path, date, row_to_drow, '1'), tree_id)

In [8]:
block_39_path = "/media/matans/My Book/FruitSpec/Apples_SA/039ROYA0"
block_39 = '039ROYA0'
block_39_df, row_tracks_39 = block_analysis(block_39_path, metadata_path, block_39)

block == "039roya0" and row == "row_111" and tree_id == 1
block == "039roya0" and row == "row_111" and tree_id == 2
block == "039roya0" and row == "row_222" and tree_id == 1
block == "039roya0" and row == "row_222" and tree_id == 2
block == "039roya0" and row == "row_333" and tree_id == 1
block == "039roya0" and row == "row_333" and tree_id == 2
block == "039roya0" and row == "row_444" and tree_id == 1
block == "039roya0" and row == "row_444" and tree_id == 2
block == "039roya0" and row == "row_555" and tree_id == 1
block == "039roya0" and row == "row_555" and tree_id == 2
block == "039roya0" and row == "row_666" and tree_id == 1
block == "039roya0" and row == "row_666" and tree_id == 2
block == "039roya0" and row == "row_777" and tree_id == 1
block == "039roya0" and row == "row_777" and tree_id == 2
block == "039roya0" and row == "row_888" and tree_id == 1
block == "039roya0" and row == "row_888" and tree_id == 2


In [None]:
block_39_df

In [13]:
gd_df = block_39_df.query('variety == "Golden Delicious"')

In [15]:
gd_df

Unnamed: 0,id,block,row,tree,side,tree_id,variety,Age,F,cv1,...,F/cv1,dcv1,dcv2,dcv3,dcv4,dcv5,err_cv1,err_dcv1,err_cv3,err_dcv3
0,57,039ROYA0,Row_111,1,A,1,Golden Delicious,46,1512,1424,...,1.061798,1275,705,466,346,255,-0.277744,-0.323243,-0.072475,-0.12511
1,58,039ROYA0,Row_111,2,A,2,Golden Delicious,46,2414,1632,...,1.479167,1466,945,726,597,482,0.08279,0.047033,-0.041518,-0.097893
2,59,039ROYA0,Row_222,2,B,1,Golden Delicious,46,2414,1715,...,1.40758,1447,923,611,409,277,0.036143,0.059384,0.008383,0.076015
3,60,039ROYA0,Row_222,1,B,2,Golden Delicious,46,1512,1721,...,0.878559,1479,896,651,498,386,-0.54424,-0.534961,-0.572964,-0.571774
4,61,039ROYA0,Row_333,3,A,1,Golden Delicious,46,1460,745,...,1.959732,687,362,257,190,142,0.307708,0.261611,0.405526,0.3574
5,62,039ROYA0,Row_333,4,A,2,Golden Delicious,46,1671,1142,...,1.463222,911,420,255,163,112,0.072796,0.144494,0.406654,0.442911
6,63,039ROYA0,Row_444,4,B,1,Golden Delicious,46,1671,1094,...,1.527422,1008,514,318,220,142,0.111768,0.053403,0.317929,0.305277
7,64,039ROYA0,Row_444,3,B,2,Golden Delicious,46,1460,848,...,1.721698,704,397,249,166,94,0.211996,0.24334,0.329365,0.377403


In [16]:
gd_factors_dict = linear_model_selection(gd_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
gd_res, f = get_selection_error(gd_factors_dict, gd_df)

true: 1460,    pred: 1097.0310443931196. (24.86 %) (64)
true: 1671,    pred: 1417.6017158093575. (15.16 %) (63)
true: 1671,    pred: 1484.9454084661804. (11.13 %) (62)
true: 1460,    pred: 960.4458133088948. (34.22 %) (61)
true: 1512,    pred: 2458.3264670059853. (62.59 %) (60)
true: 2414,    pred: 2214.5537001987195. (8.26 %) (59)
true: 2414,    pred: 2085.299370039443. (13.62 %) (58)
true: 1512,    pred: 1931.9490501205717. (27.77 %) (57)
0.24702044763762238 0.16596377199784115
(0.24702044763762238, 0.16596377199784115)
[1.31508905]
true: 1460,    pred: 1097.0310443931196. (24.86 %) (64)
true: 1671,    pred: 1417.6017158093575. (15.16 %) (63)
true: 1671,    pred: 1484.9454084661804. (11.13 %) (62)
true: 1460,    pred: 960.4458133088948. (34.22 %) (61)
true: 1512,    pred: 2458.3264670059853. (62.59 %) (60)
true: 2414,    pred: 2214.5537001987195. (8.26 %) (59)
true: 2414,    pred: 2085.299370039443. (13.62 %) (58)
true: 1512,    pred: 1931.9490501205717. (27.77 %) (57)
0.247020447637

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']


In [51]:
def how_many_sections(row_tracks):
    for key in list(row_tracks.keys()):
        print(f'number of sections in row {key} is {len(list(row_tracks[key].keys()))}')

In [17]:
block_36_path = "/media/matans/My Book/FruitSpec/Apples_SA/036FUJIO"
block_36 = '036FUJIO'
block_36_df, row_tracks_36 = block_analysis(block_36_path, metadata_path, block_36)

block == "036fujio" and row == "row_111" and tree_id == 1
block == "036fujio" and row == "row_111" and tree_id == 2
block == "036fujio" and row == "row_222" and tree_id == 1
block == "036fujio" and row == "row_222" and tree_id == 2
block == "036fujio" and row == "row_333" and tree_id == 1
block == "036fujio" and row == "row_333" and tree_id == 2
block == "036fujio" and row == "row_444" and tree_id == 1
block == "036fujio" and row == "row_444" and tree_id == 2
block == "036fujio" and row == "row_555" and tree_id == 1
block == "036fujio" and row == "row_555" and tree_id == 2
block == "036fujio" and row == "row_666" and tree_id == 1
block == "036fujio" and row == "row_666" and tree_id == 2
block == "036fujio" and row == "row_777" and tree_id == 1
block == "036fujio" and row == "row_777" and tree_id == 2
block == "036fujio" and row == "row_888" and tree_id == 1
block == "036fujio" and row == "row_888" and tree_id == 2


In [52]:
how_many_sections(row_tracks_36)

number of sections in row row_111 is 2
number of sections in row row_222 is 2
number of sections in row row_333 is 2
number of sections in row row_444 is 2
number of sections in row row_555 is 2
number of sections in row row_666 is 2
number of sections in row row_777 is 2
number of sections in row row_888 is 2


In [None]:
block_36_df

In [None]:
row_to_drow = 'row_222'
tree_id = 1
date = '241123'
draw_tree_bb_from_tracks(row_tracks_36[row_to_drow][tree_id], os.path.join(block_36_path, date, row_to_drow, '1'), tree_id)

In [20]:
f_36_df = block_36_df.query('variety == "Fuji"')

In [21]:
f_36_df = block_36_df.query('variety == "Fuji"')
fuji_factors_dict = linear_model_selection(f_36_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
res, fuji_block_36_df_res = get_selection_error(fuji_factors_dict, f_36_df)

true: 962,    pred: 841.4976788473219. (12.53 %) (88)
true: 812,    pred: 601.6729643195838. (25.90 %) (87)
true: 812,    pred: 951.5979031528129. (17.19 %) (86)
true: 962,    pred: 1078.6986088973244. (12.13 %) (85)
true: 1262,    pred: 887.1060025519054. (29.71 %) (84)
true: 990,    pred: 1031.1946145353895. (4.16 %) (83)
true: 990,    pred: 1353.6229352082037. (36.73 %) (82)
true: 1262,    pred: 1155.356322552611. (8.45 %) (81)
0.18349829599437675 0.1058936744393489
(0.18349829599437675, 0.1058936744393489)
[1.82526333]
true: 962,    pred: 841.4976788473219. (12.53 %) (88)
true: 812,    pred: 601.6729643195838. (25.90 %) (87)
true: 812,    pred: 951.5979031528129. (17.19 %) (86)
true: 962,    pred: 1078.6986088973244. (12.13 %) (85)
true: 1262,    pred: 887.1060025519054. (29.71 %) (84)
true: 990,    pred: 1031.1946145353895. (4.16 %) (83)
true: 990,    pred: 1353.6229352082037. (36.73 %) (82)
true: 1262,    pred: 1155.356322552611. (8.45 %) (81)
0.18349829599437675 0.10589367443934

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_df[f'err_{item_}'] = (block_df['F'] - (block_df[item_] * factors_dict[item_]['factor'])) / block_df['F']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

In [22]:
res

{'cv1': {'err': 0.02913828909100187, 'err_std': 0.18047224951270183},
 'dcv1': {'err': 0.02926530149327095, 'err_std': 0.18107355014277565},
 'cv3': {'err': 0.05497862639192526, 'err_std': 0.2391133235306315},
 'dcv3': {'err': 0.0634698888894831, 'err_std': 0.25628202625323704}}

In [23]:
fuji_factors_dict

{'cv1': {'factor': array([1.79682165]),
  'mean_error': 0.18349829599437675,
  'std_error': 0.1058936744393489},
 'dcv1': {'factor': array([1.93696799]),
  'mean_error': 0.18957531207045905,
  'std_error': 0.09519785032208494},
 'cv3': {'factor': array([3.8288042]),
  'mean_error': 0.268568187887578,
  'std_error': 0.09078185858250647},
 'dcv3': {'factor': array([4.27885095]),
  'mean_error': 0.2875401553709739,
  'std_error': 0.10128249838334163}}

In [None]:
p_36_df = block_36_df.query('variety == "Pink Lady"')
pink_factors_dict = linear_model_selection(p_36_df, selection_cols=['cv1','dcv1', 'cv3', 'dcv3'],type_col="block", cross_val='id')
p_res, pink_block_36_df_res = get_selection_error(pink_factors_dict, p_36_df)

In [None]:
p_res

In [None]:
pink_factors_dict

In [48]:
f_36_df.cv1.std()

112.04264175495226

In [49]:
f_36_df.cv1.mean()

538.875

In [50]:
p_36_df.cv1.std()

NameError: name 'p_36_df' is not defined