In [2]:
# To support both python 2 and python 3
from __future__ import print_function, division, unicode_literals
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# Usual imports
import numpy as np
import os
import cv2
import warnings
from os.path import join as path_join
from tqdm import tqdm
import json
tqdm.pandas()
import datetime

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import math

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import pandas as pd
pd.set_option('display.expand_frame_repr', False)
pd.set_option("display.max_columns", 100)

# while not os.getcwd().endswith("ml"):
#     os.chdir(os.path.abspath(os.path.join(os.pardir)))
print("Working dir:", os.getcwd())

from common.analysis.analysis_tools import calculate_CDF
from common.data_manipulation.image_data_tools.bounding_box_tools import get_bbox_dimensions
from common.data_manipulation.pandas_tools import save_dataframe, add_face_id, load_df, separate_df_based_on_column_range, add_image_id

from common.image_tools.image_loading import load_image
from common.image_tools.cropping import crop_box, crop_image_center_square
from common.image_tools.resizer import resize_image, ResizingType

from common.machine_learning_tools.qol_machine_learning_tools import train_val_test_split

from common.miscellaneous import print_indexed_list, print_fraction, verbose_print
from common.visualizations.image_visualizations import show_image, show_images, draw_pose, draw_bounding_box, draw_landmarks, draw_confidences
from common.visualizations.figure_plotting import *

Working dir: D:\Igno\Programing\Python\common


ModuleNotFoundError: No module named 'common.machine_learning_tools'

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression, LinearRegression

from pydataset import data

# Prepare data

In [None]:
d = data()
d

In [None]:
sizes = []

for index, pandas_row in d.iterrows():
    dataset = data(pandas_row['dataset_id'])
    sizes.append(len(dataset))
    
d['size'] = sizes
d

In [None]:
d.sort_values('size').tail(20)

In [None]:
df = data('diamonds')
df

In [None]:
iris_data = load_iris()
iris_data

In [None]:
# gt_column = 'target'
# feature_columns = iris_data['feature_names']

gt_column = 'price'
feature_columns = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']

In [None]:
df = pd.DataFrame(iris_data['data'], columns=feature_columns)
df[gt_column] = iris_data[gt_column]
df

In [None]:
df.describe()

In [None]:
train_coef = 

train_val_test_split()

train_df = df.sample(100)
test_df = df.loc[~df.index.isin(train_df.index)].sample(frac=1)

display(train_df)
display(test_df)

In [None]:
log_reg = LogisticRegression()
log_reg.fit(train_df[iris_data['feature_names']], train_df[gt_column])

preds = log_reg.predict(test_df[iris_data['feature_names']])
preds_probas = log_reg.predict_proba(test_df[iris_data['feature_names']])
correct_probas = np.choose(test_df[gt_column].values, preds_probas.T)
correct = preds == test_df[gt_column]
acc = sum(correct) / len(test_df)
acc

# Plots

In [None]:
plot_path = "plot samples"
os.makedirs(plot_path, exist_ok=True)

## histogram

In [None]:
plot_save_name = "hist.jpg"
plot_save_fullname = path_join(plot_path, plot_save_name)

plot_histogram(df['sepal length (cm)'], bins=len(pd.unique(df['sepal length (cm)'])), add_quatinles=True, save_fullname=plot_save_fullname)
plot_histogram(df['sepal length (cm)'], bins=10, add_quatinles=[0.1, 0.2, 0.3, 0.5, 0.6, 0.8, 0.99], add_cumulative=True, fig_size=(8, 5))

## Multiple histograms

In [None]:
plot_save_name = "hist_mult.jpg"
plot_save_fullname = path_join(plot_path, plot_save_name)

plot_multiple_histograms([df['sepal length (cm)'], df['sepal width (cm)'], df['petal width (cm)'], df['petal width (cm)']], ['s length', 's witdh', 'p length', 'p witdh'], title="lengths and widths")
plot_multiple_histograms([df['sepal length (cm)'], df['sepal width (cm)']], ['length', 'witdh'], add_quatinles=True, save_fullname=plot_save_fullname, only_save=True)

## Scatter

In [None]:
plot_scatter(df['sepal length (cm)'], df['sepal width (cm)'], 4+5*df['petal length (cm)'], axis_labels=['length', 'witdh'], fig_size=(8, 8))

## XY curves

In [None]:
x = sorted(np.random.rand(10))
y_1 = np.random.rand(10)
y_2 = np.random.rand(10)

plot_xy_curves(x, [y_1, y_2], ['1', '2'])
plot_xy_curves(x, [y_1, y_2], ['1', '2'], marker=False)

## Xs and Ys

In [None]:
x_1 = sorted(np.random.rand(10))
x_2 = sorted(np.random.rand(145))
y_1 = np.random.rand(10)
y_2 = np.random.rand(145)

plot_xs_and_ys([x_1, x_2], [y_1, y_2], ['1', '2'])

## CDF

In [None]:
threshs = np.arange(0, 1.0000000001, 0.005)
cdf = calculate_CDF(correct_probas, threshs)

plot_CDF(cdf, threshs, marker=None)
plot_CDF(cdf, threshs, log_scale='x', axis_limits=[0.3, 1.01])

## multiple cdfs

In [None]:
noisy_correct_probas = correct_probas + (np.random.rand(len(correct_probas)) - 0.5) / 10
noisy_cdf = calculate_CDF(noisy_correct_probas, threshs)

plot_CDFs([cdf, noisy_cdf], threshs, ['cdf', 'noisy'])

## bidir CDFs

In [None]:



plot_multiple_bidir_CED_curves()