# Data Visualization

## Macros

### Imports

In [1]:
import pandas as pd
import plotly.express as px
from os import path
import numpy as np

### Defines

In [2]:
pd.set_option('display.max_columns', None)

DATA_PATH = path.join("..", "..", "resources", "database", "Vegetable_market.csv")
data = pd.read_csv(DATA_PATH)

data

Unnamed: 0,Vegetable,Season,Month,Temp,Deasaster Happen in last 3month,Vegetable condition,Price per kg
0,potato,winter,jan,15,no,fresh,20
1,tomato,winter,jan,15,no,fresh,50
2,peas,winter,jan,15,no,fresh,70
3,pumkin,winter,jan,15,no,fresh,25
4,cucumber,winter,jan,15,no,fresh,20
...,...,...,...,...,...,...,...
116,brinjal,winter,jan,15,yes,fresh,33
117,ginger,winter,jan,15,no,fresh,88
118,potato,summer,apr,32,no,fresh,24
119,peas,summer,apr,33,no,fresh,33


## Data Manipulation

In [3]:
data.shape

(121, 7)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 7 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Vegetable                        121 non-null    object
 1   Season                           121 non-null    object
 2   Month                            121 non-null    object
 3   Temp                             121 non-null    int64 
 4   Deasaster Happen in last 3month  121 non-null    object
 5   Vegetable condition              121 non-null    object
 6   Price per kg                     121 non-null    int64 
dtypes: int64(2), object(5)
memory usage: 6.7+ KB


In [5]:
data.describe()

Unnamed: 0,Temp,Price per kg
count,121.0,121.0
mean,24.892562,55.330579
std,9.319157,48.769934
min,15.0,9.0
25%,15.0,22.0
50%,27.0,35.0
75%,32.0,70.0
max,43.0,250.0


In [6]:
data.fillna(value=np.nan, inplace=True)

In [7]:
data["Vegetable"].value_counts()

Vegetable
tomato             12
potato             11
peas               11
pumkin             10
ginger              9
cabage              8
califlower          8
pointed grourd      7
Raddish             6
brinjal             6
chilly              5
Bitter gourd        5
onion               5
okra                5
garlic              5
cucumber            4
radish              4
Name: count, dtype: int64

In [8]:
count_vegetable = data["Vegetable"].value_counts().reset_index()

count_vegetable

Unnamed: 0,Vegetable,count
0,tomato,12
1,potato,11
2,peas,11
3,pumkin,10
4,ginger,9
5,cabage,8
6,califlower,8
7,pointed grourd,7
8,Raddish,6
9,brinjal,6


In [9]:
fig = px.bar(
    data_frame=count_vegetable,
    x="Vegetable",
    y="count",
    title="Contagem de Vegetais",
)

fig.show()

In [10]:
fig = px.scatter_matrix(
    data_frame=data,
    dimensions=["Vegetable", "Season", "Price per kg"],
    color="Price per kg",
    title="Correlacões entre features"
)

fig.show()

In [11]:
count_season = data["Season"].value_counts().reset_index()

count_season

Unnamed: 0,Season,count
0,winter,58
1,summer,37
2,monsoon,21
3,spring,3
4,autumn,2


In [12]:
fig = px.pie(
    data_frame=count_season,
    values="count",
    names="Season",
    title="Gráfico pizza das estacões"
)

fig.show()

In [13]:
fig = px.histogram(
    data_frame=data,
    x="Vegetable",
    y="Price per kg",
    title="Vegetal x Preco"
)

fig.show()