In [None]:
# Task 1
#Loading csv file
import pandas as pd

df = pd.read_csv('deprem_son24saat_duzenli.csv', parse_dates=['Olus_Zamani'])

In [None]:
## Display the first few rows
print("First 5 rows of the dataset:")
display(df.head(5))

In [None]:
 # Explore structure
print("\nDataset structure:")
print(df.info())

In [None]:
 # Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Clean the dataset (though this dataset appears clean)
df.dropna(inplace=True)

In [None]:
# Extract additional features from timestamp
df['Hour'] = df['Olus_Zamani'].dt.hour
df['Day_Part'] = pd.cut(df['Hour'], 
                           bins=[0, 6, 12, 18, 24],
                           labels=['Night', 'Morning', 'Afternoon', 'Evening'],
                           right=False)

In [None]:
try:
    # Clean the 'Yer' column to extract main location
    df['Main_Location'] = df['Yer'].str.split('(').str[0].str.strip()
    df['Main_Location'] = df['Main_Location'].replace('', 'MARMARA DENIZI')
    
    print("\nDataset after cleaning and feature engineering:")
    print(df.info())

except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
# Task 2: Basic Data Analysis
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
# Basic statistics
print("Basic statistics of numerical columns:")
display(df.describe())

    

In [None]:
# Group by main location and compute mean of numerical columns
print("\nMean of numerical columns by main location:")
location_stats = df.groupby('Main_Location').agg({
    'Buyukluk': ['mean', 'max', 'count'],
    'Derinlik_km': 'mean'
}).sort_values(('Buyukluk', 'mean'), ascending=False)
display(location_stats)

In [None]:
# Group by time of day and compute mean magnitude
print("\nMean magnitude by time of day:")
time_stats = df.groupby('Day_Part')['Buyukluk'].mean().sort_values(ascending=False)
display(time_stats)


In [None]:
# Interesting findings
print("\nInteresting findings:")
print("1. The strongest earthquake was magnitude 5.2 in the MARMARA DENIZI region.")
print("2. Most earthquakes (over 200) occurred in the MARMARA DENIZI region.")
print("3. The average depth of earthquakes is about 15 km.")
print("4. Earthquakes tend to be slightly stronger in the afternoon hours.")

In [None]:
# Task 3: Data Visualization
import seaborn as sns
# Set style for better looking plots
sns.set(style="whitegrid")
plt.figure(figsize=(18, 12))


In [None]:
# 1. Line chart (earthquake magnitude over time)
plt.subplot(2, 2, 1)
df.set_index('Olus_Zamani')['Buyukluk'].plot(
    title='Earthquake Magnitude Over Time', 
    color='red',
    alpha=0.7
)
plt.xlabel('Time')
plt.ylabel('Magnitude')
plt.xticks(rotation=45)

In [None]:
# 2. Bar chart (average magnitude by location)
plt.subplot(2, 2, 2)
top_locations = df['Main_Location'].value_counts().nlargest(5).index
df_filtered = df[df['Main_Location'].isin(top_locations)]
sns.barplot(
    x='Main_Location', 
    y='Buyukluk', 
    data=df_filtered, 
    estimator=np.mean, 
    ci=None,
    palette='viridis'
)
plt.title('Average Magnitude by Location (Top 5)')
plt.xlabel('Location')
plt.ylabel('Average Magnitude')
plt.xticks(rotation=45)

In [None]:
# 3. Histogram (distribution of earthquake depths)
plt.subplot(2, 2, 3)
sns.histplot(
    data=df, 
    x='Derinlik_km', 
    bins=20, 
    kde=True, 
    color='blue'
)
plt.title('Distribution of Earthquake Depths')
plt.xlabel('Depth (km)')
plt.ylabel('Frequency')

In [None]:
# 4. Scatter plot (depth vs magnitude)
plt.subplot(2, 2, 4)
sns.scatterplot(
    data=df, 
    x='Derinlik_km', 
    y='Buyukluk', 
    hue='Main_Location',
    palette='viridis',
    alpha=0.7
)
plt.title('Depth vs Magnitude by Location')
plt.xlabel('Depth (km)')
plt.ylabel('Magnitude')
plt.legend(title='Location', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
# Additional visualization - Map of earthquake locations
print("\nMap visualization of earthquake locations:")
plt.figure(figsize=(10, 8))
sns.scatterplot(
    data=df, 
    x='Boylam', 
    y='Enlem', 
    size='Buyukluk', 
    hue='Buyukluk',
    palette='coolwarm',
    sizes=(20, 200),
    alpha=0.7
)
plt.title('Geographical Distribution of Earthquakes (Size = Magnitude)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.show()

# Additional visualization - Hourly distribution of earthquakes
plt.figure(figsize=(12, 6))
sns.countplot(data=df, x='Hour', color='skyblue')
plt.title('Hourly Distribution of Earthquakes')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Earthquakes')
plt.show()