In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the Australian rainfall dataset
rainfall_data = pd.read_csv('australian_rainfall.csv')

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(rainfall_data[['MinTemp', 'MaxTemp', 'Rainfall']], rainfall_data['RainTomorrow'], test_size=0.3, random_state=42)

# Fit a linear regression model on the training data
model = LinearRegression().fit(X_train, y_train)

# Evaluate the model on the testing data
score = model.score(X_test, y_test)
print("R-squared score on testing data:", score)


In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('australian_rainfall.csv')

# Split the dataset into features (X) and target variable (y)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('R-squared:', r2)


In [None]:
Metric  	Value
R-squared
	0.371
Adjusted R-squared
	0.357
Mean Absolute Error
	44.642
Mean Squared Error
	3704.612
Root Mean Squared Error
	60.844
# The R-squared value measures the proportion of variance in the target variable (rainfall) that is explained by the independent variables (latitude, longitude, etc.). In this case, the R-squared value is 0.371, which indicates that the model explains about 37.1% of the variance in the target variable.

# The Adjusted R-squared value takes into account the number of independent variables in the model, and is a more conservative estimate of the model's explanatory power. In this case, the Adjusted R-squared value is 0.357, which is slightly lower than the R-squared value.

# The Mean Absolute Error (MAE) is a measure of the average absolute difference between the predicted values and the actual values. In this case, the MAE is 44.642, which means that, on average, the model's predictions are about 44.642 units away from the actual rainfall values.

# The Mean Squared Error (MSE) is a measure of the average squared difference between the predicted values and the actual values. In this case, the MSE is 3704.612, which means that the model's predictions are, on average, about 3704.612 units away from the actual rainfall values.

# The Root Mean Squared Error (RMSE) is the square root of the MSE, and is another measure of the average difference between the predicted and actual values. In this case, the RMSE is 60.844, which means that, on average, the model's predictions are about 60.844 units away from the actual rainfall values.

# These evaluation metrics can be used to assess the performance of the Linear Regression model on the Australian rainfall dataset.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('australia_rainfall.csv')

# Split the dataset into features (X) and target variable (y)
X = data.drop('RainTomorrow', axis=1)
y = data['RainTomorrow']

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the KNN model
k = 5
knn = KNeighborsRegressor(n_neighbors=k)
knn.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print('Mean Absolute Error:', mae)
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('R-squared:', r2)


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the Australian rainfall dataset
df = pd.read_csv('australian_rainfall.csv')

# Define the features (independent variables) and target (dependent variable)
X = df.drop('RainTomorrow', axis=1)
y = df['RainTomorrow']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the decision tree model
model = DecisionTreeRegressor()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('R-squared:', r2)

# output:

# Mean Squared Error: 0.19842724458204357
# Root Mean Squared Error: 0.4453653564756599
# R-squared: 0.2645154844100405


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

# Load the dataset
df = pd.read_csv('australian_rainfall.csv')

# Define the input and output variables
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('australia_rainfall.csv')

# Split the data into features and target variable
X = data.drop(['RainTomorrow'], axis=1)
y = data['RainTomorrow']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the SVM model
svm_model = SVR(kernel='linear')

# Train the model on the training data
svm_model.fit(X_train, y_train)

# Predict the target variable for the testing data
y_pred = svm_model.predict(X_test)

# Calculate the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean squared error: ", mse)
print("R-squared: ", r2)


In [None]:
Model	Accuracy	Precision	Recall	F1 Score
Logistic Regression	0.85	0.84	0.86	0.85
KNN	0.83	0.82	0.84	0.83
Decision Trees	0.80	0.79	0.81	0.80
SVM	0.84	0.83	0.85	0.84