# **Earthquake Analysis**

This notebook analyzes the `quakes` dataset in R.

Dataset: Quakes

Author: Utkarsh Tyagi

### Load required libraries

install.packages("skimr")
install.packages("dplyr")
install.packages("janitor")
install.packages("here")
install.packages("tidyverse")
install.packages("tidyr")
library(tidyverse)
library(skimr)
library(dplyr)
library(janitor)
library(here)
library(tidyr)

### Check the structure of the data

data(quakes)

View(quakes)

head(quakes)

colnames(quakes)

str(quakes)

### Clean the dataset
quakes_df <- quakes %>%
  rename("Number_of_Stations" = "stations")  %>%
    drop_na(mag) %>%
    remove_empty("rows") %>%
    filter(mag > 4.5) %>%
    arrange(desc(Number_of_Stations))


### Check the structure of the cleaned dataset
str(quakes_df)

head(quakes_df)

colnames(quakes_df)

summary(quakes_df)

dim(quakes_df)

dim(quakes)

nrow(quakes_df)

ncol(quakes_df)


### check the correlation between the magnitude and number of stations
cor(quakes_df$mag, quakes_df$Number_of_Stations)


### Change the structure of the quakes dataset, add intensity index as a correlation between magnitude and number of stations
quakes_df <- quakes_df %>%
    mutate(mag_level = case_when(
        mag >= 4.5 & mag < 5.5 ~ "Medium",
        mag >= 5.5 ~ "High"
    )) %>%
    mutate(Intensity_Index= mag * Number_of_Stations)

### Do visualization of the gathered data to get the valuable insights
library(ggplot2)

ggplot(data = quakes_df, aes(x = mag, y = Number_of_Stations)) +
  geom_smooth(aes(color = mag_level), size = 3) +
  labs(title = "Magnitude vs Number of Stations",
       x = "Magnitude",
       y = "Number of Stations") +
  theme_minimal()

ggplot(data = quakes_df, aes(x = mag)) +
  geom_histogram(binwidth = 0.5, fill = "blue", color = "black") +
  labs(title = "Histogram of Magnitude",
       x = "Magnitude",
       y = "Frequency") +
  theme_minimal()

### Get the data as a part of a new dataset
write_csv(quakes_df, "/Users/utkarshtyagi/Documents/VSC/Dataset/quakes_df.csv")
