## Scatter Plots
- Scatter plots allow the comparison of two variables for a set of a data.

## Generating random Scatter plot using graph_objs

In [1]:
import numpy as np
import pandas as pd

import plotly.offline as pyo
import plotly.graph_objs as go
pyo.init_notebook_mode(connected = True)

In [2]:
#Apply a seed value to generate random numbers
np.random.seed(42)

In [2]:
#Let's generate some data first
random_x = np.random.randint(1, 100, 101)
random_y = np.random.randint(1, 100, 101)

In [3]:
random_x

array([92, 83, 24, 84, 32, 18, 53, 18, 36, 28, 66, 61, 81, 65, 14, 30, 10,
       66, 86, 17, 84, 61, 77, 54,  1, 15, 13, 82, 63, 57, 96, 63, 95, 32,
       14, 10, 92, 22, 44, 60, 38, 42, 19, 45, 78, 30, 21, 75, 13, 44,  7,
       52, 17, 67, 50, 33, 61, 73, 52, 11, 52, 71,  8,  7, 78, 20, 98, 90,
       88, 45, 27, 83, 39, 73, 70, 20,  1, 56, 24, 93, 56, 14, 37, 89, 49,
        5, 71, 32, 76,  1, 32, 69, 97, 83, 32, 75, 40, 42, 54, 15,  3])

In [4]:
random_y

array([13, 24, 68, 30, 70, 64, 35, 84, 41, 73, 13, 75, 72, 53, 76, 20, 73,
       52, 99, 81, 76, 91, 19, 67, 83,  8, 35, 28, 28, 96, 75, 13, 54, 27,
       65, 30, 21, 15, 44, 67, 30, 45, 73, 25, 36, 26, 36, 82, 34, 94, 55,
        9,  1, 88,  9, 23, 50, 19, 91, 59, 28, 18, 70,  6, 41, 51, 71, 23,
       60, 79,  9,  6, 99, 61, 72, 57, 21, 55, 46, 94, 58, 18, 33, 22, 67,
        8, 20, 34, 11, 24, 76, 95,  8, 32, 25, 88, 92, 17, 69, 17, 76])

#### Generate Data first

In [5]:
data = [
    go.Scatter(x = random_x, 
               y = random_y,
               mode='markers',
               marker = dict(
                   size =12,
                   color = 'rgb(51,204,153)',
                   symbol = 'pentagon',
                   line = {'width' : 2}
            ))
]

In [6]:
pyo.iplot(data)

#### Let's create a Layout for the Scatter plot

In [7]:
layout = go.Layout(
    title = 'Random Number Plot',
    xaxis = dict(title = 'Random X axis'),
    yaxis = dict(title = 'Random Y axis'),
    hovermode = 'closest'
)

#### Create a Figure object to place the Layout on plot


In [8]:
figure = go.Figure(
    data = data,
    layout = layout
)

In [9]:
pyo.iplot(figure)

## Generating random Scatter Plot using plotly_express
- wrapper over graph_objs to make it simpler to generate plots.
- need to pass the DataFrame and use the parameters to define the plot.

In [10]:
import plotly.express as px

In [11]:
df = pd.DataFrame({'X' : random_x, 'Y': random_y})
df.head()

Unnamed: 0,X,Y
0,92,13
1,83,24
2,24,68
3,84,30
4,32,70


In [100]:
px.scatter(
    data_frame = df, 
    x = 'X', 
    y = 'Y', 
    title = 'Random Number Plot', 
    labels= {'X' : 'Random X', 'Y' : 'Random Y'},
    height=500,
    width=700
)

## Plotting World Stats using graph objs
 - year vs population plot

In [65]:
world_stats = pd.read_csv('worldstats.csv')
world_stats = world_stats.loc[world_stats['country'].isin(['India', 'Australia', 'China']), :]
world_stats.head(3)

Unnamed: 0,country,year,Population,GDP
2607,Australia,2015,23781169.0,1339539000000.0
2608,Australia,2014,23464086.0,1454675000000.0
2609,Australia,2013,23117353.0,1563951000000.0


In [66]:
#To assign colors to each country differently, we need to pass different numbers for colors.
dict(zip(world_stats['country'].unique(), range(len(world_stats['country'].unique()))))

{'Australia': 0, 'China': 1, 'India': 2}

In [85]:
worldData = [
    go.Scatter(
        x = world_stats['year'],
        y = world_stats['Population'],
        mode = 'markers',
        marker_color = world_stats['country'].replace(
            dict(zip(world_stats['country'].unique(), np.linspace(100, 500, 
                                                                  len(world_stats['country'].unique()))))
            ),
        hovertext=world_stats['country'])
]

In [86]:
worldLayout = go.Layout(
    title = 'World Population Stats',
    xaxis = dict(title = 'Year'),
    yaxis = dict(title = 'Population')
)

In [87]:
worldFig = go.Figure(
    data = worldData,
    layout = worldLayout
)

In [88]:
worldFig.show()

## Plotting World Stats using plotly express

In [56]:
world_stats = pd.read_csv('worldstats.csv')
world_stats.head(3)

Unnamed: 0,country,year,Population,GDP
0,Arab World,2015,392022276.0,2530102000000.0
1,Arab World,2014,384222592.0,2873600000000.0
2,Arab World,2013,376504253.0,2846994000000.0


In [96]:
px.scatter(
    data_frame = world_stats[world_stats['country'].isin(['India', 'Australia'])], 
    x = 'year', 
    y = 'Population', 
    title = 'Population', 
    color = 'country',
    labels= {'X' : 'Year', 'Y' : 'Population'},
    range_x = [1980, 2010],
    hover_name = 'country',

)

In [91]:
#Use of facet_col parameter
px.scatter(
    data_frame = world_stats[world_stats['country'].isin(['India', 'Australia'])], 
    x = 'year', 
    y = 'Population', 
    title = 'Population', 
    color = 'country',
    labels= {'X' : 'Year', 'Y' : 'Population'},
    range_x = [1980, 2010],
    hover_name = 'country',
    facet_col = 'country'  #Use to split the graph in parts based on column values

)