import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('path_to_weather_data.csv')

print(df.head())

In [None]:
# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Fill or remove missing values
df.fillna(method='ffill', inplace=True)

# Convert Fahrenheit to Celsius
df['temperature_C'] = (df['temperature_F'] - 32) * (5.0/9.0)

In [None]:
# Plot temperature over time
plt.figure(figsize=(10, 5))
plt.plot(df['date'], df['temperature_C'], label='Temperature')
plt.title('Daily Temperature Over Time')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.show()

# Plotting other factors like precipitation, humidity, etc.


In [None]:
# Seasonal analysis
df['month'] = df['date'].dt.month
monthly_avg = df.groupby('month').mean()

plt.bar(monthly_avg.index, monthly_avg['temperature_C'])
plt.title('Average Monthly Temperature')
plt.xlabel('Month')
plt.ylabel('Average Temperature (°C)')
plt.xticks(monthly_avg.index)
plt.show()


In [None]:
# Predict next day's temperature
X = df[['temperature_C']].shift(-1)  # features are previous day's temperatures
y = df['temperature_C']  # target is today's temperature

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
predicted_temps = model.predict(X_test)