# Importing all required packages

In [None]:
import plotly.figure_factory as ff
import plotly.express as px
import plotly.io as pio
import seaborn as sns
from ctypes import PyDLL
from _plotly_utils.basevalidators import DataArrayValidator
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
import numpy as np

pio.templates.default = "plotly_dark"

# Dataset 1 : Gapminder

In [None]:
dataset1 = px.data.gapminder()

## Bar Chart

In [None]:
dataset1.modified = px.data.gapminder().query("country == 'India'")
fig = px.bar(dataset1.modified, x='year', y='pop',
             hover_data=['lifeExp', 'gdpPercap'], color='lifeExp',
             labels={'pop':'Population of India'}, height=400)
fig.show()

  """Entry point for launching an IPython kernel.


## Sunburst

In [None]:
dataset1.modified2 = dataset1.groupby(['continent', 'country'])['gdpPercap'].sum().reset_index()


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



In [None]:
sunburst = px.sunburst(dataset1.modified2,
    path=['continent', 'country', 'gdpPercap'])
sunburst.show()


# Dataset 2 : Coffee

In [None]:
coffee = pd.read_csv('https://github.com/Aparna-K28/Datasets/blob/78225c70a83c21b3bd3a12b68cec855d18db32f6/CoffeeAndCodeLT2018%20-%20CoffeeAndCodeLT2018.csv?raw=true')
coffee

Unnamed: 0,CodingHours,CoffeeCupsPerDay,CoffeeTime,CodingWithoutCoffee,CoffeeType,CoffeeSolveBugs,Gender,Country,AgeRange
0,8,2,Before coding,Yes,Caffè latte,Sometimes,Female,Lebanon,18 to 29
1,3,2,Before coding,Yes,Americano,Yes,Female,Lebanon,30 to 39
2,5,3,While coding,No,Nescafe,Yes,Female,Lebanon,18 to 29
3,8,2,Before coding,No,Nescafe,Yes,Male,Lebanon,
4,10,3,While coding,Sometimes,Turkish,No,Male,Lebanon,18 to 29
...,...,...,...,...,...,...,...,...,...
95,6,2,Before coding,Yes,Nescafe,Yes,Male,Lebanon,18 to 29
96,4,1,Before coding,Sometimes,Nescafe,Sometimes,Female,Lebanon,18 to 29
97,10,3,Before coding,Yes,Cappuccino,Yes,Male,Lebanon,Under 18
98,2,2,While coding,Sometimes,Espresso (Short Black),Sometimes,Female,Lebanon,18 to 29


## Sankey Diagram

In [None]:
coffee1 = coffee
coffee1 = coffee1[~coffee1["CoffeeType"].isin(["Espresso (Short Black) ", "Double Espresso (Doppio)", "Espresso (Short Black)"])]
coffee_grouped = coffee1.groupby(by=["CoffeeSolveBugs","CoffeeType",]).sum()[["CodingHours"]]
coffee_grouped = coffee_grouped.reset_index()

node = coffee_grouped.CoffeeSolveBugs.values.tolist() + coffee_grouped.CoffeeType.values.tolist()
from_indices = [node.index(coffeeSolveBugs) for coffeeSolveBugs in coffee_grouped.CoffeeSolveBugs]
to_indices = [node.index(coffeeType) for coffeeType in coffee_grouped.CoffeeType]

fig = go.Figure(data=[go.Sankey(
    # Define nodes
    node = dict(
      label =  node,
      color =  "blue"
    ),

    # Add links
    link = dict(
      source =  to_indices,
      target =  from_indices,
      value =  coffee_grouped.CodingHours,
))])

fig.update_layout(title_text="Sankey Diagram For Coffee Dataset",
                  font_size=10)
fig.show()


## Treemap

In [None]:
coffee["coffee"] = "coffee" # in order to have a single root node

treemap_gapminder2007 = px.treemap(coffee, 
    path=['coffee', 'Gender', 'CoffeeSolveBugs','CoffeeTime'], values='CoffeeCupsPerDay',
    color='CodingHours',
    color_continuous_scale='RdBu')
treemap_gapminder2007.show()

# Dataset 3 : HDI

In [None]:
hdi = pd.read_csv('https://github.com/Aparna-K28/Datasets/blob/04c2406b9b7db7c28dee013bae0c0040f73032bf/hdi%20(1).csv?raw=true')


## Scatterplot

In [None]:
fig = px.scatter(hdi, x="1990", y="2019", trendline="ols",text = 'Country_name')
fig.show()


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



## Histogram

In [None]:
fig = px.histogram(hdi, x="1990", nbins=20)
fig.show()
fig = px.histogram(hdi, x="2019", nbins=20)
fig.show()


# Dataset 4 : Salary

In [None]:
dataset4 = pd.read_csv('https://github.com/Aparna-K28/Datasets/blob/78225c70a83c21b3bd3a12b68cec855d18db32f6/Salaries.csv?raw=true')

## Parallel Coordinates

In [None]:
dataset4.modified = dataset4[['sex','yrs.service','yrs.since.phd','salary']]
dataset4.modified['sex'].replace(['Male','Female'],[0,1],inplace = True)


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



In [None]:
fig = px.parallel_coordinates(dataset4.modified, color="yrs.service", labels={
                "sex": "Sex", "yrs.service" : "Years of Service",	"yrs.since.phd": "Years Since Ph.D" ,
                "Salary" : 'Salary'},
                             color_continuous_scale=px.colors.diverging.Tealrose,
                             color_continuous_midpoint=2)
fig.show()

## Sunburst

In [None]:
sunburst = px.sunburst(dataset4,
    path=['sex', 'discipline', 'rank'])
sunburst.show()