### Socialcops Challenge

<b>Working with dataset of each APMC_Commodity inside apmc_commodity folder.</b>

This jupyter notebook contains all the visualization stuff plotted using data inside apmc_commodity folder.

#### 1. Import required libraries

In [4]:
from __future__ import absolute_import, division, print_function

# Data handeling
import pandas as pd

# Data Visualization
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel

import os
import random

In [5]:
# pandas config
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [6]:
random.seed(9001)

In [7]:
output_notebook()

#### 2. Load the dataset

In [8]:
# Get current working directory
def get_cwd():
    return os.getcwd()

data_folder = 'cleaned_data'
# Read the file
def read_file(filename):
    dataset_path = os.path.join(get_cwd(), data_folder, filename)
    return pd.read_csv(dataset_path, parse_dates = ['date'])

In [9]:
# list of all the filenames
def files_list():
    files = []
    for file in os.listdir(os.path.join(get_cwd(), data_folder)):
        files.append(file)
    return files

In [10]:
file_names = files_list()

In [11]:
random_file_1 = file_names[random.randint(0, len(file_names))]
random_file_1

'Akole_paddy-unhusked.csv'

In [12]:
read_file(filename = random_file_1)

Unnamed: 0.1,Unnamed: 0,Commodity,Year,Month,arrivals_in_qtl,min_price,max_price,modal_price,date,district_name,diff_price
0,0,paddy-unhusked,2014,September,119,1450,1900,1681,2014-09-01,Ahmadnagar,450
1,1,paddy-unhusked,2014,October,103,1425,1900,1669,2014-10-01,Ahmadnagar,475
2,2,paddy-unhusked,2014,November,204,1450,1800,1625,2014-11-01,Ahmadnagar,350
3,3,paddy-unhusked,2014,December,346,1400,1800,1625,2014-12-01,Ahmadnagar,400
4,4,paddy-unhusked,2015,January,403,1395,1860,1600,2015-01-01,Ahmadnagar,465
5,6,paddy-unhusked,2015,March,129,1400,1850,1613,2015-03-01,Ahmadnagar,450
6,7,paddy-unhusked,2015,April,113,1450,1925,1625,2015-04-01,Ahmadnagar,475
7,8,paddy-unhusked,2015,May,54,1425,1950,1681,2015-05-01,Ahmadnagar,525
8,9,paddy-unhusked,2015,June,62,1467,2067,1692,2015-06-01,Ahmadnagar,600
9,10,paddy-unhusked,2015,July,87,1440,1880,1595,2015-07-01,Ahmadnagar,440


#### 3. Function that can plot the 'diff_price' attribute 

In [13]:
def diff_price_plot(filename: str):
    names = filename.split('_')
    title = 'Difference in max and min price of ' + names[0] + ' (APMC) and ' + names[1][:-4] + ' (Commodity)'
    min_price_title = 'Min. price variation in ' + names[0] + ' (APMC) and ' + names[1][:-4] + ' (Commodity)'
    max_price_title = 'Max price variation in ' + names[0] + ' (APMC) and ' + names[1][:-4] + ' (Commodity)'
    apmc_comm = read_file(filename)
    df = apmc_comm.loc[:, ['Year', 'Month', 'min_price', 'max_price', 'date', 'diff_price']]
    df_cds = ColumnDataSource(df)
    tooltips = [
        ('diff_price', '@diff_price'),
        ('min_price', '@min_price'),
        ('max_price', '@max_price'),
        ('Year', '@Year'),
        ('Month', '@Month')
    ]
    min_price_tooltips = [
        ('min_price', '@min_price'),
        ('Year', '@Year'),
        ('Month', '@Month')
    ]
    max_price_tooltips = [
        ('max_price', '@max_price'),
        ('Year', '@Year'),
        ('Month', '@Month')
    ]
    fig = figure(x_axis_type = 'datetime',
                plot_height = 300, plot_width = 600,
                title = title,
                x_axis_label = 'Date', y_axis_label = 'Difference in max and min price',
                toolbar_location = None)
    fig.line('date', 'diff_price', color = 'blue', source = df_cds)
    fig.circle('date', 'diff_price', fill_color = 'white', size = 8, source = df_cds)
    fig.add_tools(HoverTool(tooltips = tooltips))
    
    min_price_fig = figure(x_axis_type = 'datetime',
                          plot_height = 300, plot_width = 600,
                          title = min_price_title,
                          x_axis_label = 'Date', y_axis_label = 'Min. price',
                          toolbar_location = None)
    min_price_fig.line('date', 'min_price', color = 'red', source = df_cds)
    min_price_fig.circle('date', 'min_price', fill_color = 'white', size = 8, source = df_cds)
    min_price_fig.add_tools(HoverTool(tooltips = min_price_tooltips))
    
    max_price_fig = figure(x_axis_type = 'datetime',
                          plot_height = 300, plot_width = 600,
                          title = max_price_title,
                          x_axis_label = 'Date', y_axis_label = 'Max. price',
                          toolbar_location = None)
    max_price_fig.line('date', 'max_price', color = 'orange', source = df_cds)
    max_price_fig.circle('date', 'max_price', fill_color = 'white', size = 8, source = df_cds)
    max_price_fig.add_tools(HoverTool(tooltips = max_price_tooltips))
    
    first_panel = Panel(child = fig, title = 'Difference in Price')
    second_panel = Panel(child = min_price_fig, title = 'Min Price')
    third_panel = Panel(child = max_price_fig, title = 'Max. Price')
    
    tabs = Tabs(tabs = [first_panel, second_panel, third_panel])
    show(tabs)

In [15]:
diff_price_plot(random_file_1)

#### 4. Function that can plot the 'modal_price' attribute

In [16]:
def modal_price_plot(filename: str):
    apmc_comm = read_file(filename)
    df = apmc_comm.loc[:, ['Year', 'Month', 'modal_price', 'date']]
    df_cds = ColumnDataSource(df)
    tooltips = [
        ('modal_price', '@modal_price'),
        ('Year', '@Year'),
        ('Month', '@Month')
    ]
    fig = figure(x_axis_type = 'datetime',
                plot_height = 300, plot_width = 600,
                title = 'Modal price varies with time',
                x_axis_label = 'Date', y_axis_label = 'Modal price',
                toolbar_location = None)
    fig.line('date', 'modal_price', color = 'blue', source = df_cds)
    fig.circle('date', 'modal_price', fill_color = 'white', size = 8, source = df_cds)
    fig.add_tools(HoverTool(tooltips = tooltips))
    show(fig)

In [17]:
modal_price_plot(random_file_1)

#### 5. Function that can plot the 'arrival_in_qtl' attribute

In [18]:
def arrivals_plot(filename: str):
    apmc_comm = read_file(filename)
    df = apmc_comm.loc[:, ['Year', 'Month', 'arrivals_in_qtl', 'date']]
    df_cds = ColumnDataSource(df)
    tooltips = [
        ('arrival', '@arrivals_in_qtl'),
        ('Year', '@Year'),
        ('Month', '@Month')
    ]
    fig = figure(x_axis_type = 'datetime',
                plot_height = 300, plot_width = 600,
                title = 'Arrival quantity varies with time',
                x_axis_label = 'Date', y_axis_label = 'Arrival quantity',
                toolbar_location = None)
    fig.line('date', 'arrivals_in_qtl', color = 'blue', source = df_cds)
    fig.circle('date', 'arrivals_in_qtl', fill_color = 'white', size = 8, source = df_cds)
    fig.add_tools(HoverTool(tooltips = tooltips))
    show(fig)

In [19]:
arrivals_plot(random_file_1)

#### 6. Highest price fluctuation across different commodities in each relevant year

In [20]:
def yearly_max_values(filename: str):
    maxprice = []
    apmc_comm = read_file(filename)
    uniq_year = apmc_comm['Year'].unique().tolist()
    for year in uniq_year:
        year_comm = apmc_comm[apmc_comm['Year'] == year]
        max_val = year_comm['max_price'].max()
        maxprice.append((year, max_val))
    year = []
    max_value = []
    for l in maxprice:
        year.append(l[0])
        max_value.append(l[1])
        
    df = pd.DataFrame({'year': year, 'max_value': max_value})
    return df


In [21]:
yearly_max_values(random_file_1)

Unnamed: 0,year,max_value
0,2014,1900
1,2015,2067
2,2016,2200


In [22]:
def yearly_max_value_plot(filename: str):
    df = yearly_max_values(filename)
    df['year'] = pd.to_datetime(df.year, format='%Y', errors='ignore')
    df_cds = ColumnDataSource(df)
    tooltips = [
        ('Max price', '@max_value')
    ]
    fig = figure(x_axis_type = 'datetime',
                plot_height = 300, plot_width = 600,
                title = 'Yearly Max. Value',
                x_axis_label = 'year', y_axis_label = 'max_value',
                toolbar_location = None)
    fig.line('year', 'max_value', color = 'blue', source = df_cds)
    fig.circle('year', 'max_value', fill_color = 'white', size = 8, source = df_cds)
    
    fig.add_tools(HoverTool(tooltips = tooltips))
    show(fig)

In [23]:
yearly_max_value_plot(random_file_1)