In [1]:
import pandas as pd

# Importing the CSV file
df = pd.read_csv('query_edited.csv')

print (df)

                          time   latitude   longitude     depth   mag  \
0     2023-11-07T06:20:27.627Z  31.656000 -104.315000    5.1385  1.80   
1     2023-11-07T06:05:57.453Z  57.453300 -156.518300   95.3000  1.80   
2     2023-11-07T06:01:38.598Z  60.330900 -152.133300  100.1000  2.10   
3     2023-11-07T05:53:22.960Z  17.978500  -66.908500   12.6500  2.38   
4     2023-11-07T05:52:33.912Z -23.153100  -67.889900  169.0080  4.20   
...                        ...        ...         ...       ...   ...   
4705  2023-10-08T00:39:56.690Z  38.686667 -122.498667    6.2600  1.84   
4706  2023-10-08T00:27:24.440Z  19.127001 -155.393829   31.6800  1.92   
4707  2023-10-08T00:22:55.530Z  19.251167 -155.405334   29.4000  2.10   
4708  2023-10-08T00:19:30.004Z  -5.577000  146.201200   81.5920  4.50   
4709  2023-10-08T00:17:47.340Z  17.913667  -66.923833   14.9800  2.73   

                                        place locationSource  
0                               western Texas             tx

In [2]:
# # Removing the rows with missing values
df.dropna


<bound method DataFrame.dropna of                           time   latitude   longitude     depth   mag  \
0     2023-11-07T06:20:27.627Z  31.656000 -104.315000    5.1385  1.80   
1     2023-11-07T06:05:57.453Z  57.453300 -156.518300   95.3000  1.80   
2     2023-11-07T06:01:38.598Z  60.330900 -152.133300  100.1000  2.10   
3     2023-11-07T05:53:22.960Z  17.978500  -66.908500   12.6500  2.38   
4     2023-11-07T05:52:33.912Z -23.153100  -67.889900  169.0080  4.20   
...                        ...        ...         ...       ...   ...   
4705  2023-10-08T00:39:56.690Z  38.686667 -122.498667    6.2600  1.84   
4706  2023-10-08T00:27:24.440Z  19.127001 -155.393829   31.6800  1.92   
4707  2023-10-08T00:22:55.530Z  19.251167 -155.405334   29.4000  2.10   
4708  2023-10-08T00:19:30.004Z  -5.577000  146.201200   81.5920  4.50   
4709  2023-10-08T00:17:47.340Z  17.913667  -66.923833   14.9800  2.73   

                                        place locationSource  
0                         

In [3]:
# Rounding off the data to to given decimal places in different columns

column_name1 = 'latitude'
column_name2 = 'longitude'
column_name3 = 'depth'
column_name4 = 'mag'
df[column_name1] = df[column_name1].round(3)
df[column_name2] = df[column_name2].round(3)
df[column_name3] = df[column_name3].round(2)
df[column_name4] = df[column_name4].round(2)

In [4]:
# Handling Outliers

lower_bound4 = 0
upper_bound4 = 10

# Identify Outliers
outliers = (df[column_name4] < lower_bound4) | (df[column_name4] > upper_bound4)
# Removing Outliers 
df_no_outliers = df[~outliers]


lower_bound3 = 0
upper_bound3 = 700
outliers = (df[column_name3] < lower_bound3) | (df[column_name3] > upper_bound3)
df_no_outliers = df[~outliers]


In [5]:
# Making a new feature as Zone

# Step 1: Calculate the frequency for each locationSource
frequency_counts = df['locationSource'].value_counts()

# Step 2: Create a new feature 'zone' based on the specified conditions
df['zone'] = df['locationSource'].apply(lambda x: 'red' if frequency_counts[x] > 350 else ('yellow' if 100 <= frequency_counts[x] <= 350 else 'green'))


In [6]:
print(df.dtypes)

time               object
latitude          float64
longitude         float64
depth             float64
mag               float64
place              object
locationSource     object
zone               object
dtype: object


In [7]:
# Reworking the date & time columns

# Convert 'date_and_time' to datetime format
df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%dT%H:%M:%S.%fZ')

# Create a new column 'new_date_and_time' that contains both date and time information
df['new_date_and_time'] = df['time']

# Extract date and time components into new columns
df['date'] = df['new_date_and_time'].dt.date
df['time'] = df['new_date_and_time'].dt.time

# Print the updated DataFrame
print(df)


                 time  latitude  longitude   depth   mag  \
0     06:20:27.627000    31.656   -104.315    5.14  1.80   
1     06:05:57.453000    57.453   -156.518   95.30  1.80   
2     06:01:38.598000    60.331   -152.133  100.10  2.10   
3     05:53:22.960000    17.978    -66.908   12.65  2.38   
4     05:52:33.912000   -23.153    -67.890  169.01  4.20   
...               ...       ...        ...     ...   ...   
4705  00:39:56.690000    38.687   -122.499    6.26  1.84   
4706  00:27:24.440000    19.127   -155.394   31.68  1.92   
4707  00:22:55.530000    19.251   -155.405   29.40  2.10   
4708  00:19:30.004000    -5.577    146.201   81.59  4.50   
4709  00:17:47.340000    17.914    -66.924   14.98  2.73   

                                        place locationSource    zone  \
0                               western Texas             tx     red   
1                  53 km E of Ugashik, Alaska             ak     red   
2               40 km NW of Ninilchik, Alaska             ak   

In [8]:
columns_to_remove = ['new_date_and_time']
df = df.drop(columns=columns_to_remove)

In [9]:
# Get the last column name
last_column = df.columns[-1]

# Move the last column to the first position
df = pd.concat([df[last_column], df.drop(last_column, axis=1)], axis=1)

In [10]:
print (df)


            date             time  latitude  longitude   depth   mag  \
0     2023-11-07  06:20:27.627000    31.656   -104.315    5.14  1.80   
1     2023-11-07  06:05:57.453000    57.453   -156.518   95.30  1.80   
2     2023-11-07  06:01:38.598000    60.331   -152.133  100.10  2.10   
3     2023-11-07  05:53:22.960000    17.978    -66.908   12.65  2.38   
4     2023-11-07  05:52:33.912000   -23.153    -67.890  169.01  4.20   
...          ...              ...       ...        ...     ...   ...   
4705  2023-10-08  00:39:56.690000    38.687   -122.499    6.26  1.84   
4706  2023-10-08  00:27:24.440000    19.127   -155.394   31.68  1.92   
4707  2023-10-08  00:22:55.530000    19.251   -155.405   29.40  2.10   
4708  2023-10-08  00:19:30.004000    -5.577    146.201   81.59  4.50   
4709  2023-10-08  00:17:47.340000    17.914    -66.924   14.98  2.73   

                                        place locationSource    zone  
0                               western Texas             tx    

In [11]:
# Finding the seismic zones for different locations

unique_zones = df[['locationSource', 'zone']].drop_duplicates()

# Print the result
print(unique_zones.to_string(index=False))                                                                                                               

locationSource   zone
            tx    red
            ak    red
            pr yellow
            us    red
            hv    red
            ci yellow
            nc yellow
            uu  green
            nn yellow
            mb  green
            ok  green
            uw  green
            nm  green
            av yellow
            se  green


In [12]:
# Making an interactive World Map UI where user can browse the seismic zones along with their locations on the world map

import folium
from IPython.display import display

# Create a Folium Map centered at a location of your choice
world_map = folium.Map(location=[20, 0], zoom_start=2)  # Adjust the center of the map as needed

# Dictionary of places, their coordinates, and corresponding colors
places_data = {
    'texas': {'coordinates': (31.9686, -99.9018), 'color': 'red'},
    'alaska': {'coordinates': (61.0169, -149.7375), 'color': 'red'},
    'puerto rico': {'coordinates': (18.2208, -66.5901), 'color': 'orange'},
    'uzbekistan': {'coordinates': (41.3775, 64.5853), 'color': 'red'},
    'hawaii': {'coordinates': (20.7967, -156.3319), 'color': 'red'},
    'canada': {'coordinates': (56.1304, -106.3468), 'color': 'orange'},
    'california': {'coordinates': (36.7783, -119.4179), 'color': 'orange'},
    'utah': {'coordinates': (39.3200, -111.0937), 'color': 'green'},
    'south carolina': {'coordinates': (33.8361, -81.1637), 'color': 'orange'},
    'oklahoma': {'coordinates': (35.4676, -97.5164), 'color': 'green'},
    'new mexico': {'coordinates': (34.5199, -105.8701), 'color': 'green'},
    'alabama': {'coordinates': (32.8067, -86.7911), 'color': 'orange'},
    'south carolina_2': {'coordinates': (33.8361, -81.1637), 'color': 'green'},
    'delhi': {'coordinates': (28.6139, 77.2090), 'color': 'orange'},  # Delhi coordinates
}

# Add markers for each place with specified colors and names
for place, data in places_data.items():
    folium.Marker(
        location=data['coordinates'],
        popup=f'<strong>{place}</strong>',
        tooltip=folium.Tooltip(text=place),
        icon=folium.Icon(color=data['color'])
    ).add_to(world_map)

# Display the map inline
display(world_map)


In [13]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

le = LabelEncoder()
df['locationSource'] = le.fit_transform(df['locationSource'])

# Select features and target variables
features = ['mag', 'depth', 'locationSource']
target = ['latitude', 'longitude']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

In [20]:
print (df)

            date             time  latitude  longitude   depth   mag  \
0     2023-11-07  06:20:27.627000    31.656   -104.315    5.14  1.80   
1     2023-11-07  06:05:57.453000    57.453   -156.518   95.30  1.80   
2     2023-11-07  06:01:38.598000    60.331   -152.133  100.10  2.10   
3     2023-11-07  05:53:22.960000    17.978    -66.908   12.65  2.38   
4     2023-11-07  05:52:33.912000   -23.153    -67.890  169.01  4.20   
...          ...              ...       ...        ...     ...   ...   
4705  2023-10-08  00:39:56.690000    38.687   -122.499    6.26  1.84   
4706  2023-10-08  00:27:24.440000    19.127   -155.394   31.68  1.92   
4707  2023-10-08  00:22:55.530000    19.251   -155.405   29.40  2.10   
4708  2023-10-08  00:19:30.004000    -5.577    146.201   81.59  4.50   
4709  2023-10-08  00:17:47.340000    17.914    -66.924   14.98  2.73   

                                        place  locationSource    zone  
0                               western Texas              11  

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 1977.5476162339444


In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Convert timestamp to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'])
data.set_index('timestamp', inplace=True)

# Separate latitude and longitude time series
latitude_series = data['latitude']
longitude_series = data['longitude']

# Fit ARIMA model for latitude
lat_model = ARIMA(latitude_series, order=(1, 1, 1))  # Adjust order as needed
lat_fit = lat_model.fit()

# Fit ARIMA model for longitude
lon_model = ARIMA(longitude_series, order=(1, 1, 1))  # Adjust order as needed
lon_fit = lon_model.fit()

# Forecast future values (adjust steps parameter as needed)
lat_forecast = lat_fit.get_forecast(steps=1)
lon_forecast = lon_fit.get_forecast(steps=1)

# Extract predicted values
predicted_latitude = lat_forecast.predicted_mean.values[0]
predicted_longitude = lon_forecast.predicted_mean.values[0]

print(f'Predicted Latitude: {predicted_latitude:.4f}')
print(f'Predicted Longitude: {predicted_longitude:.4f}')

# Optional: Plot the results
plt.plot(latitude_series.index, latitude_series, label='Actual Latitude')
plt.plot(lat_forecast.predicted_mean.index, lat_forecast.predicted_mean, label='Predicted Latitude', linestyle='--')
plt.legend()
plt.show()

plt.plot(longitude_series.index, longitude_series, label='Actual Longitude')
plt.plot(lon_forecast.predicted_mean.index, lon_forecast.predicted_mean, label='Predicted Longitude', linestyle='--')
plt.legend()
plt.show()