# Visualisations

TEMPLATE - FILL WITH YOUR DATASETS DETAILS

Importing Libraries:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px  # Optional for interactive plots

# The blow code is optional, but it improves the aesthetics of the plots
sns.set(style="whitegrid", palette="muted")
plt.rcParams['figure.figsize'] = (10,6)

Loading Data:

In [None]:
df = pd.read_csv('../data/your_dataset.csv')  # Make sure to adjust the path and filename as necessary

Quick look at the data:

In [None]:
df.head() # First few rows of the dataset
df.info() # Data types and non-null counts
df.describe() # Summary statistics
df.columns # List of columns in the dataset

### Visualise Distributions:

Numerical columns:

In [None]:
# Creating a histogram and box plot for numeric columns

Categorical columns:

In [None]:
# Count plot

### Visualise Relationships:

Numerical vs numerical:

In [None]:
# Creating a scatter plot to compare two columns
sns.scatterplot(x='col1', y='col2', data=df, hue='target_col') 
plt.title('Scatterplot of col1 vs col2')
plt.show()

# Creating a regression plot between two columns
sns.regplot(x='col1', y='col2', data=df)
plt.title('Regression plot of col1 vs col2')
plt.show()


Categorical vs numerical:

In [None]:
# Creating a box plot to compare two columns
sns.boxplot(x='categorical_col', y='numeric_col', data=df)
plt.title('Boxplot of numeric_col by categorical_col')
plt.show()

# Creating a violin plot to compare two columns
sns.violinplot(x='categorical_col', y='numeric_col', data=df)
plt.title('Violin plot of numeric_col by categorical_col')
plt.show()

Categorical vs categorical:

In [None]:
# Creating a cross tab to compare two categories
cross_tab = pd.crosstab(df['cat1'], df['cat2'])
sns.heatmap(cross_tab, annot=True, fmt='d', cmap='Blues')
plt.title('Heatmap of cat1 vs cat2')
plt.show()

Correlation and heatmaps:

In [None]:
# Creating a correlation matrix and heatmap
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

Pair plots:

In [None]:
# Creating a pair plot
sns.pairplot(df, hue='target_col')
plt.show()

Time series visualisations (if applicable):

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df['value_col'].plot()
plt.title('Time Series of value_col')
plt.show()

# Optional interactive
px.line(df, x=df.index, y='value_col', title='Interactive Time Series')

Saving plots:

In [None]:
plt.savefig('plot_name.png', dpi=300, bbox_inches='tight') # Enter your plot name in place of 'plot_name.png' to save it