# Plotting

In [None]:
color='skyblue', edgecolor='black'

### Basic things

In [None]:
plt.yscale('log')


""" V1 """
plt.figure(figsize=(8,6))
plt.plot(depths, acuracy_scores, marker="o")

x_label = "Characters"
y_label = "Total spoken length"
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.title(f'{x_label} VS {y_label}')
plt.xticks(rotation=90) 

plt.ylim(0.9, 1.0)
plt.xlim(depths[0] - 1, depths[-1] + 1)
plt.tight_layout()


""" V2 """
plt.figure(figsize=(8,6))
plt.plot(models, losses, marker="o", label="Loss Curve")  

x_label = "Characters"
y_label = "Total spoken length"
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.title(f'{x_label} VS {y_label}')
plt.xticks(rotation=90) 

plt.ylim(min(losses) - 0.1, max(losses) + 0.1)  
plt.legend()
plt.grid(True)
plt.tight_layout()

### Integer Grid

In [None]:
plt.xticks(range(1, len(precisions)+1))  # Adjust range 
plt.yticks([i / 10 for i in range(0, 11)])  # Y-axis from 0 to 1 steps of 0.1
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

### Legend plot: `Label="..."`

In [None]:
plt.plot(t_train, train_acc_history, label="Train")
plt.plot(t_val, val_acc_history, label="Val")
...   
plt.legend()
...

### Normal plot


In [None]:
baseball.plot.scatter(x='hr', y='X2b')
baseball.plot.scatter(x='ab', y='h')

### Histogram and Barplot

In [None]:
# Frecuency
pd_term_frequency.hist(bins=20, color='skyblue', edgecolor='black', figsize=(8, 6))

plt.title("Term Frequency Distribution", fontsize=16)
plt.xlabel("Frequency", fontsize=14)
plt.ylabel("Count", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

# By another variabble
movies_by_genre = movies.groupby("Main_Genre")["imdb_rating"].mean()
sns.barplot(x=movies_by_genre.index, y=movies_by_genre.values, palette="viridis")

- Solaped histograms

In [None]:
ax = sns.histplot(treated['re78'], kde=True, stat='density', color='blue', label='treated')
ax = sns.histplot(control['re78'], kde=True, stat='density', color='orange', label='control')
ax.set(title='Income distribution comparison in 1978',xlabel='Income 1978', ylabel='Income density')
plt.legend()
plt.show()

### LOG LOG SCALE

In [None]:
corpus_frequency.plot.hist(column=["Frequency"], loglog=True, bins=np.logspace(0, 6, 100),
                           title="Frequency histogram (loglog scale)");

### Plot with Error 
- Fill

In [None]:
movies_year["up_lim"] = movies_year["mean"] + movies_year["std"]
movies_year["down_lim"] = movies_year["mean"] - movies_year["std"]


plt.figure(figsize=(8,6))
# plt.errorbar(movies_year.index, movies_year["mean"], yerr=movies_year["std"], fmt='-o', capsize=5)

plt.fill_between(movies_year.index, movies_year["up_lim"], movies_year["down_lim"], alpha=0.2)
plt.plot(movies_year["mean"], color="orange", marker="o")
plt.xlabel("Year")
plt.ylabel("Worldwide Gross ·10^9")
plt.tight_layout()

- Error bar

In [None]:
plt.figure(figsize=(8,6))
plt.errorbar(movies_year.index, movies_year["mean"], yerr=movies_year["std"], fmt='-o', capsize=5)

plt.xlabel("Year")
plt.ylabel("Worldwide Gross ·10^9")
plt.tight_layout()

- Barplot wiht error

In [None]:
def plot_bar_group_with_error(group_by, feature, df, add_numbers = True, alpha_err = 0.35):
    plt.figure(figsize=(8,4))
    grouped = df.groupby(group_by)[feature].agg(["mean", "std"]).reset_index()

    bars = plt.bar(
        grouped[group_by], grouped["mean"], yerr=grouped["std"], 
        capsize=5, color='lightblue', edgecolor='black', error_kw={'alpha': alpha_err}
    )
        
    if add_numbers:
        for bar, mean in zip(bars, grouped["mean"]):
            plt.text(
                bar.get_x() + bar.get_width() / 2, 
                bar.get_height() + 0.5,            
                f'{mean:.2f}',                   
                ha='center', va='bottom'          
        )
        
    plt.tight_layout()
    
plot_bar_group_with_error(
    group_by = "FuelType",
    feature = "Price",
    df = toyota_df
)

In [None]:
columns = ["imdb_rating", "length", "worldwide_gross"]

plt.figure(figsize=(12,4))
for i, feature in enumerate(columns, 1):
    rank_year = movies.groupby("rank_in_year")[feature].agg(["mean", "std"]).reset_index()
    
    plt.subplot(1, 3, i)
    plt.bar(rank_year["rank_in_year"], rank_year["mean"], yerr=rank_year["std"], 
            capsize=5, color='lightblue', edgecolor='black')
        
    plt.tight_layout()

### Staked barplot

In [None]:
treated_n = len(treated)
treated_marriage_1 = len(treated[treated["married"] == 1]) / treated_n
treated_marriage_0 = len(treated[treated["married"] == 0]) / treated_n

control_n = len(control)
control_marriage_1 = len(control[control["married"] == 1]) / control_n
control_marriage_0 = len(control[control["married"] == 0]) / control_n

# STACK BY COLUMNS NOT ROWS
plot_df = {
    "married": [treated_marriage_1, control_marriage_1],
    "not_married": [treated_marriage_0, control_marriage_0],
}
plot_df = pd.DataFrame(plot_df)

plot_df.plot(kind='bar', stacked=True, figsize=(14, 8))
# Ajouter des titres et labels
plt.title("Proportion of people maried", fontsize=16)
plt.xlabel("Control or Treated", fontsize=14)
plt.ylabel("proportion", fontsize=14)
plt.legend(title="", fontsize=12)

# Afficher le graphique
plt.tight_layout()
plt.show()

### Scatter plot

In [None]:
plt.scatter(CLEAN['Age'], CLEAN['Price'], alpha=0.6, edgecolors='k')

plt.figure(figsize=(9, 5))
plt.scatter(CLEAN['Age'], CLEAN['Price'], alpha=0.6, edgecolors='k')
plt.title("Car Age vs Price", fontsize=20)
plt.xlabel("Car Age (years)", fontsize=13)
plt.ylabel("Price (CHF)", fontsize=13)
plt.xticks(rotation=90)

plt.show()

#### Add correlation / regresion line

In [None]:
sns.lmplot(x='SelfEmployed',y='IncomePerCap', data=data_frame)
plt.title("Car age VS Car Mileage")
plt.ylabel("Car Price (CHF)")
plt.xlabel("Car Mileage (KM)")


make separation within a cathegorical value

In [None]:
sns.lmplot(x='SelfEmployed',y='IncomePerCap', data=data_frame, hue = 'State')
plt.title("Car age VS Car Mileage")
plt.ylabel("Car Price (CHF)")
plt.xlabel("Car Mileage (KM)")


### Multiplot scatter

In [None]:
df_corr = df[['re74', 're75', 're78']]

# Pairplot with specific pairings
g = sns.pairplot(df_corr, kind='reg', diag_kind='kde', height=2.5, aspect=1.5)
g.fig.suptitle("Correlation between Re74, Re75, and Re78", y=1.02, fontsize=16)

### Axis barh plot

In [None]:
plt.subplots(figsize=(5,7))
plt.barh(features_coef.name, features_coef.value, alpha=0.6)

## Multiplot

- plt.subplot(nrows, ncolums, actualPoltIndex)
- plt.figure(figsize=(wide, height))
- plt.tight_layout() -> avoid overlapping

In [None]:
heatmap_data_gross = movies.pivot_table(index='Main_Genre', columns='studio', values='worldwide_gross', aggfunc='mean')
heatmap_data_rating = movies.pivot_table(index='Main_Genre', columns='studio', values='imdb_rating', aggfunc='mean')

plt.figure(figsize=(12, 6))

plt.subplot(1,2,1)
sns.heatmap(heatmap_data_gross, cmap='YlGnBu')
plt.title("Heat map of Genre x Studio by gross")

plt.subplot(1,2,2)
sns.heatmap(heatmap_data_rating, cmap='YlGnBu')
plt.title("Heat map of Genre x Studio by rating")

plt.tight_layout() 

## Seaborn
### Cool plottings, Join Plots (_Change "kind"_)

- Scatter plot + histogram
- Heatmap of elevation 
- Scatter plot con regression line

In [None]:
sns.jointplot(x=movies['worldwide_gross'], y=movies['imdb_rating'], kind="hex")

sns.jointplot(data = movies, x = 'worldwide_gross', y = 'imdb_rating', kind="kde")

sns.jointplot(data = movies, x = 'worldwide_gross', y = 'imdb_rating', kind="reg")


### Heat map
First create a pivot table (rows x columns each with a value) and the plot it

In [None]:
heatmap_data = movies.pivot_table(index='Main_Genre', columns='studio', values='worldwide_gross', aggfunc='mean') #mean sum median...
heatmap_data = pd.crosstab(movies['Main_Genre'], movies['studio'])
heatmap_data = pd.crosstab(movies['Main_Genre'],movies['Genre_2'], 
                           values = movies['worldwide_gross'], aggfunc='mean'
                           margins=False)



sns.heatmap(heatmap_data, cmap='YlGnBu') #cmap = color
sns.heatmap(heatmap_data,  annot=True, cmap='YlGnBu') #cmap = color