In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')

In [None]:
df = sns.load_dataset('tips') # to load built in dataframe in seaborn
df.head()

#### making scatterplot and countplot in seaborn

In [None]:

sns.scatterplot(x="total_bill",y="tip",data = df) 
plt.show()

In [None]:
sns.countplot(x="sex",data = df)
plt.show()

### Using hue in plot


In [None]:
sns.scatterplot(x="total_bill",y="tip",data = df,hue = "smoker") 
plt.show()
# we can change the legend of smokere as 'no' comes before 'yes' as ," hue_order = ["no","yes"]"

In [None]:
sns.countplot(x="sex",data = df, hue = "smoker")
plt.show()

In [None]:
#specifyng hue colors

hue_colors = {"Yes":"black",
              "No":"red"}
sns.scatterplot(x="total_bill",y="tip",data = df,hue = "smoker",palette=hue_colors) 
plt.show()

### Introducing relplot()

To do this, we're going to introduce a new Seaborn function: "relplot()". "relplot()" stands for "relational plot" and enables you to visualize the relationship between two quantitative variables using either scatter plots or line plots. You've already seen scatter plots, and you'll learn about line plots later in this chapter. Using "relplot()" gives us a big advantage: the ability to create subplots in a single figure. Because of this advantage, we'll be using "relplot()" instead of "scatterplot()" for the rest of the course

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter") 


###### Subplots in columns

By setting "col" equal to "smoker", we get a separate scatter plot for smokers and non-smokers, arranged horizontally in columns

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",col = "smoker") 


######  Subplots in rows

If you want to arrange these vertically in rows instead, you can use the "row" parameter instead of "col".

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",row = "smoker") 


###### Subplots in rows and columns

It is possible to use both "col" and "row" at the same time. Here, we set "col" equal to smoking status and "row" equal to the time of day (lunch or dinner). Now we have a subplot for each combination of these two categorical variables

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",col = "smoker", row = "time") 


In [None]:
# wrapping 4 columns in a row
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",col = "day", col_wrap=4) 
# we can also change the order of subplots using "col_order"
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",col = "day",col_wrap = 4,col_order=["sun","sat","thur","fri"]) 
plt.show(sns)

### costumizing scatter plots

Subgroups with point size

We want each point on the scatter plot to be sized based on the number of people in the group, with larger groups having bigger points on the plot. To do this, we'll set the "size" parameter equal to the variable name "size" from our dataset. 

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",size = "size") 


This plot is a bit hard to read because all of the points are of the same color.

We can make it easier by using the "size" parameter in combination with the "hue" parameter. To do this, set "hue" equal to the variable name "size". Notice that because "size" is a quantitative variable, Seaborn will automatically color the points different shades of the same color instead of different colors per category value like we saw in previous plots. Now larger groups have both larger and darker points, which provides better contrast and makes the plot easier to read.

In [None]:
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",size = "size",hue = "size") 


In [None]:
#changing point style and tarnsparency
sns.relplot(x="total_bill",y="tip",data = df,kind = "scatter",hue = "smoker",style="smoker",alpha = 0.6) 
#Setting the "alpha" parameter to a value between 0 and 1 will vary the transparency of the points in the plot, with 0 being completely transparent and 1 being completely non-transparent. 

In [None]:
mpg = sns.load_dataset('mpg')
mpg.head()

#### Introduction to line plots

What are line plots?

In Seaborn, we have two types of relational plots: scatter plots and line plots. While each point in a scatter plot is assumed to be an independent observation, line plots are the visualization of choice when we need to track the same thing over time

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.relplot(x="model_year",y="mpg",kind="line",data=mpg)
plt.show()

The shaded area is confidence interval.It gives the mean of the whole plot across different countries 

Replacing confidence interval with standard deviation

Instead of visualizing a confidence interval, we may want to see how varied the mpg are across the different origins at a given point in time. To visualize this, set the "ci" parameter equal to the string "sd" to make the shaded area represent the standard deviation.



In [None]:
sns.relplot(x="model_year",y="mpg",kind="line",data=mpg,hue="origin",style="origin",markers=True,ci=None,dashes=False)
# We can also turn off the confidence interval by setting the "ci" parameter equal to "None".
# to remove dashed graph -> dashes = False
# to enable marker->markers=True

### categorical plots(count plot,bar plot,box plot)

In [None]:
survey_data = pd.read_csv('young-people-survey-responses.csv')
student_data = pd.read_csv('student-alcohol-consumption.csv')

In [None]:
# creating count plot
sns.catplot(x="Internet usage",data = survey_data,kind = "count")
plt.show()

In [None]:
#creating a bar plot
sns.catplot(x="Gender",y="Mathematics",kind = "bar",data=survey_data)


In [None]:
# List of categories from lowest to highest
category_order = ["<2 hours", 
                  "2 to 5 hours", 
                  "5 to 10 hours", 
                  ">10 hours"]

sns.catplot(x="study_time", y="G3",
            data=student_data,
            kind="bar",
            order=category_order, # changing the order of the plot
            ci=None)

plt.show()

##### box plot

In [None]:
g=sns.catplot(x="time",
              y="total_bill",
              data = df,
              kind = "box",
              order = ["Dinner","Lunch"],
              
             )

In [None]:
g=sns.catplot(x="time",
              y="total_bill",
              data = df,
              kind = "box",
              order = ["Dinner","Lunch"],
              sym="" # to remove outlier from the box
             )

Changing the whiskers using `whis`

By default, the whiskers extend to 1 point 5 times the interquartile range, or "IQR". The IQR is the 25th to the 75th percentile of a distribution of data. If you want to change the way the whiskers in your box plot are defined, you can do this using the "whis" parameter. There are several options for changing the whiskers. You can change the range of the whiskers from 1 point 5 times the IQR (which is the default) to 2 times the IQR by setting "whis" equal to 2 point 0. Alternatively, you can have the whiskers define specific lower and upper percentiles by passing in a list of the lower and upper values. In this example, passing in "[5, 95]" will result in the lower whisker being drawn at the 5th percentile and the upper whisker being drawn at the 95th percentile. Finally, you may just want to draw the whiskers at the min and max values. You can do this by specifying the lower percentile as 0 and the upper percentile as 100.

In [None]:
g=sns.catplot(x="time",
              y="total_bill",
              data = df,
              kind = "box",
              order = ["Dinner","Lunch"],
              sym="" ,# to remove outlier from the box
              whis = [0,100]
             )

#### point plots

What are point plots?

Point plots show the mean of a quantitative variable for the observations in each category, plotted as a single point. This point plot uses the tips dataset and shows the average bill among smokers versus non-smokers. The vertical bars extending above and below the mean represent the 95% confidence intervals for that mean. Just like the confidence intervals we saw in line plots and bar plots, these confidence intervals show us the level of uncertainty we have about these mean estimates. Assuming our data is a random sample of some population, we can be 95% sure that the true population mean in each group lies within the confidence interval shown.

In [None]:
sns.catplot(x="smoker",y="total_bill",kind ="point",data = df)

2. What are point plots?

Point plots show the mean of a quantitative variable for the observations in each category, plotted as a single point. This point plot uses the tips dataset and shows the average bill among smokers versus non-smokers. The vertical bars extending above and below the mean represent the 95% confidence intervals for that mean. Just like the confidence intervals we saw in line plots and bar plots, these confidence intervals show us the level of uncertainty we have about these mean estimates. Assuming our data is a random sample of some population, we can be 95% sure that the true population mean in each group lies within the confidence interval shown.

3. Point plots vs. line plots
You may be thinking: point plots look a lot like line plots. What's the difference?

4. Point plots vs. line plots

Both line plots and point plots show the mean of a quantitative variable and 95% confidence intervals for the mean. However, there is a key difference. Line plots are relational plots, so both the x- and y-axis are quantitative variables. In a point plot, one axis - usually the x-axis - is a categorical variable, making it a categorical plot.

5. Point plots vs. bar plots

You may also be thinking: point plots seem to show the same information as bar plots. For each category, both show the mean of a quantitative variable and the confidence intervals for those means. When should we use one over the other? Let's look at an example using data from the masculinity survey that we've seen in prior lessons.

6. Point plots vs. bar plots

This is a bar plot of the percent of men per age group surveyed who report thinking that it's important that others see them as masculine, with subgroups based on whether they report feeling masculine or not. This is the same information, represented as a point plot. In the point plot, it's easier to compare the heights of the subgroup points when they're stacked above each other. In the point plot, it's also easier to look at the differences in slope between the categories than it is to compare the heights of the bars between them.

### customizing seaborn plots 

In [None]:
# customizing figure style
sns.set_style("whitegrid") # other style are "ticks","dark","darkgrid"

# setting palettes
sns.set_palette("RdBu")

# we can use custom palettes as
custom_palette = ['red','green'] # or we can use color hex codes
sns.set_palette(custom_palette)

# changing the scale : The scale option from smallest to largest are "paper", "notebook", "talk", and "poster"
sns.set_context('notebook')

sns.countplot(x="sex",data = df)
plt.show()

###### adding title and labels

Before we go into the details of adding a title, we need to understand an underlying mechanism in Seaborn. Seaborn's plot functions create two different types of objects: FacetGrids and AxesSubplots. 

    object type       plot types                       characteristics                  how to add titles
        
    FacetGrids      relplot(),catplot()              can create subplots                 g.fig.suptitle()
    Axesplots       scatterplot(),countplot(),etc    only creates a single plot          g.set_title()

In [None]:

category_order = ["<2 hours", 
                  "2 to 5 hours", 
                  "5 to 10 hours", 
                  ">10 hours"]

g=sns.catplot(x="study_time", y="G3",
            data=student_data,
            kind="bar",
            order=category_order, # changing the order of the plot
            ci=None)
# adding title to FacetGrid objects,
#To add a title to a FacetGrid object, first assign the plot to the variable "g". After you assign the plot to "g", you can set the title using "g dot fig dot suptitle". This tells Seaborn you want to set a title for the figure as a whole.
g.fig.suptitle("New Title",
              y = 1.05 # to adjust the height of the title
              )

# adding axis labels
g.set(xlabel="Hello",
      ylabel="Namaste")

#rotating x-axis tick labels
plt.xticks(rotation = 90)

plt.show()