# Installing necessary packages for data analysis and visualization

In [None]:
!pip install rpy2
!pip install joypy



# Importing libraries for data manipulation, visualization, and R integration

In [None]:
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from joypy import joyplot

# Converting an R data frame from RDS format to a Pandas DataFrame and saving it as a CSV file

In [None]:


# Enable automatic conversion between R and Pandas data frames
pandas2ri.activate()

# Define the path to your RDS file and the desired output CSV file path
rds_file_path = '/content/lincoln_temps.rds'
csv_file_path = '/content/lincoln_temps.csv'

# Load the RDS file using R's readRDS function
read_rds = ro.r['readRDS']
df_r = read_rds(rds_file_path)

# Convert the R data frame to a Pandas data frame
with localconverter(ro.default_converter + pandas2ri.converter):
  df_pandas = ro.conversion.rpy2py(df_r)

# Save the Pandas data frame to a CSV file
df_pandas.to_csv(csv_file_path, index=False)


#Load the CSV file into a DataFrame:

In [None]:
df=pd.read_csv('/content/lincoln_temps.csv')
df.head()
df.info()

#Examples 1: Boxplot
Creating a boxplot to visualize the distribution of mean temperatures across different months

In [None]:
sns.boxplot(x='month_long', y='mean_temp', data=df)


# Enhancing the Visualization of Mean Temperature Distribution by Month

In [None]:


# Create a boxplot using 'df' as your DataFrame
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")

# Optional: Enhance plot readability
plt.xticks(rotation=90)  # Rotate the month names for better readability
plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels
plt.show()


In [None]:

# Create a boxplot using 'df' as your DataFrame
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")

# Optional: Enhance plot readability
plt.xticks(rotation=45)  # Rotate the month names for better readability
plt.title('Mean Temperature by Month in Lincoln')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')

plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels
plt.show()


 # Rotate the month names for better readability

In [None]:

# Create a boxplot using 'df' as your DataFrame
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")
plt.xticks(rotation='vertical')  # Rotate the month names for better readability
plt.title('Mean Temperature by Month in Lincoln')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')

plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels
plt.show()


# Set new labels directly, assuming there are 12 months and they are all represented

In [None]:
plt.figure(figsize=(10,8))

# Assume 'df' is your DataFrame and you've plotted your boxplot as before
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")

# Define abbreviated month names as labels
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Set new labels directly, assuming there are 12 months and they are all represented
plt.xticks(range(len(month_labels)), month_labels)

plt.title('Mean Temperature by Month in Lincoln')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')
plt.show()


# Styling and Customizing a Boxplot for Mean Temperature Distribution by Month

In [None]:
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10,8))

# Assume 'df' is your DataFrame and you've plotted your boxplot as before
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")

# Define abbreviated month names as labels
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Set new labels directly, assuming there are 12 months and they are all represented
plt.xticks(range(len(month_labels)), month_labels)

plt.title('Mean Temperature by Month in Lincoln')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')
plt.show()


In [None]:
plt.figure(figsize=(10,8))
# Set the theme
sns.set_theme(style="darkgrid")

# Define abbreviated month names as labels
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Create the boxplot
sns.boxplot(x='month_long', y='mean_temp', data=df, color="skyblue")

# Set custom x-axis tick labels
plt.xticks(range(len(month_labels)), month_labels)

# Customize the plot
plt.title('Mean Temperature by Month in Lincoln', fontsize=16)  # Set title with custom font size
plt.xlabel('Month', fontsize=14)  # Customize x-axis label
plt.ylabel('Mean Temperature', fontsize=14)  # Customize y-axis label
plt.tick_params(labelsize=12)  # Customize tick parameters

plt.show()



In [None]:
import matplotlib.style
matplotlib.style.available

#Examples: Violins

In [None]:
# Create the violin plot
sns.violinplot(x='month_long', y='mean_temp', data=df, inner=None, color="skyblue")

# Customizing the plot
plt.title('Distribution of Mean Temperatures by Month in Lincoln')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')

# Rotate the x-axis labels for better readability if needed
plt.xticks(rotation=45)
plt.show()

# Create the strip plot

In [None]:
# Create the strip plot
sns.stripplot(x='month_long', y='mean_temp', data=df, color="skyblue", jitter=False)
plt.show()


Whether adding more jitter to a plot is good or bad depends on the context of your data visualization needs:

- **Good**: More jitter can be beneficial when you have a large number of overlapping data points within categories. It spreads out the points more significantly, making it easier to visually distinguish between them and to observe the density and distribution of data points within each category.

- **Bad**: However, adding too much jitter can also distort the visual representation of your data. It may spread the points too far from their actual categories, making it harder to associate them with the correct category and potentially misleading the interpretation of the data's actual distribution.

In summary, the optimal amount of jitter should be determined based on the specific requirements of your visualization and the characteristics of your dataset. It's about finding a balance that improves readability without compromising the integrity of the data's representation.


In [None]:


# Set the theme for a clean background
sns.set_theme(style="whitegrid")

# Create the strip plot
sns.stripplot(x='month_long', y='mean_temp', data=df, color="skyblue", jitter=False)

# Customize the plot
plt.title('Mean Temperature by Month in Lincoln (No Jitter)')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')

plt.xticks(range(len(month_labels)), month_labels)

plt.show()


In [None]:
# Set the theme for a clean background
sns.set_theme(style="whitegrid")

# Create the strip plot
sns.stripplot(x='month_long', y='mean_temp', data=df, color="skyblue", jitter=True)

# Customize the plot
plt.title('Mean Temperature by Month in Lincoln (Jitter)')
plt.xlabel('Month')
plt.ylabel('Mean Temperature')

plt.xticks(range(len(month_labels)), month_labels)

plt.show()

In [None]:
sns.stripplot(x='month_long', y='mean_temp', data=df, color="skyblue", jitter=0.2)
# Rotate the x-axis labels for better readability if needed
plt.xticks(rotation=45)
plt.show()

In [None]:
# Set the background style
sns.set(style="whitegrid")

# Create a violin plot
sns.violinplot(x='month_long', y='mean_temp', data=df, inner=None, color="skyblue")

# Overlay a strip plot with increased jitter to spread out the data points
sns.stripplot(x='month_long', y='mean_temp', data=df, jitter=0.25, size=3, color='black', linewidth=0)

plt.title('Sina-like Plot Example')
plt.xticks(range(len(month_labels)), month_labels)
plt.show()


#Examples: Ridgeline plot

### Creating a Ridgeline Plot to Visualize Temperature Distribution by Month

The following code snippet demonstrates how to create a Ridgeline plot using the `joyplot` function from the `joypy` library. This visualization technique is particularly effective for displaying the distribution and density of a numerical variable (`mean_temp`) across different categories (`month_long`). By selecting the relevant columns from the DataFrame and specifying these as parameters in the `joyplot` function, we can generate a comprehensive view of how mean temperatures vary month by month.


In [None]:
data=df[['mean_temp', 'month_long']] # Selecting the relevant columns

# Create the Ridgeline plot
fig, axes = joyplot(df,
    by='month_long',  # This groups data by month
    column='mean_temp',  # This specifies which column to plot densities fo
)
plt.show()


In [None]:
# Create the Ridgeline plot
fig, axes = joyplot(data,
    by='month_long',  # This groups data by month
    column='mean_temp',  # This specifies which column to plot densities for
    figsize=(12, 8),  # Adjust the size of the plot as needed
    colormap=plt.cm.viridis,  # Choose a colormap, e.g., viridis, plasma, inferno, magma
    ylabelsize=12,  # Adjust the y-axis label size as needed
    xlabelsize=12,  # Adjust the x-axis label size as needed
    title="Ridgeline Plot of Mean Temperature by Month in Lincoln",  # Add a title
    alpha=0.75  # Adjust the transparency of the filled areas
)

plt.show()
