In [None]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, KFold
import numpy as np

# Load the dataset
data = pd.read_csv('N2O.csv')

# Preprocessing: Drop missing values for relevant columns
columns_to_use = ['DOC', 'WNO3-', 'N2O', 'WTN', 'WT','WNH4+']  # Added 'WTN'
data_cleaned = data[columns_to_use].dropna()

# Select predictors and target
X = data_cleaned[['DOC', 'WNO3-', 'WTN', 'WT', 'WNH4+']]  # Included 'WTN' as a predictor
y = data_cleaned['N2O']

# Generate polynomial features (degree=3, matching the equations in the screenshot)
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)

# Set up Ridge regression model
ridge_model = Ridge(alpha=1.0)  # You can adjust the alpha value for regularization strength

# Set up k-fold cross-validation (e.g., 5 folds)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validation: Evaluate the model using R^2 score
cv_scores = cross_val_score(ridge_model, X_poly, y, cv=kf, scoring='r2')

# Train the model using the entire dataset for final evaluation
ridge_model.fit(X_poly, y)

# Get the coefficients and intercept
coefficients = ridge_model.coef_
intercept = ridge_model.intercept_

# Predict and evaluate the model
y_pred = ridge_model.predict(X_poly)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)

# Print the model equation
feature_names = poly.get_feature_names_out(['DOC', 'WNO3-', 'WTN', 'WT', 'WNH4+'])
print("Model Equation:")
equation = f"N2O = {intercept:.4f} "
for coef, name in zip(coefficients, feature_names):
    equation += f"+ ({coef:.4f} * {name}) "
print(equation)

# Print evaluation metrics
print(f"R^2: {r2:.4f}")
print(f"MSE: {mse:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
import scipy.stats as stats

# Load the dataset
data = pd.read_csv('N2O.csv')

# Preprocessing: Drop missing values for relevant columns
columns_to_use = ['DOC', 'WNO3-', 'N2O', 'WTN', 'WT','WNH4+']  # Added 'WTN'
data_cleaned = data[columns_to_use].dropna()

# Select predictors and target
X = data_cleaned[['DOC', 'WNO3-', 'WTN', 'WT','WNH4+']]  # Included 'WTN' as a predictor
y = data_cleaned['N2O']

# Generate polynomial features (degree=3)
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)

# Set up Ridge regression model
ridge_model = Ridge(alpha=1.0)  # You can adjust the alpha value for regularization strength

# Set up k-fold cross-validation (e.g., 5 folds)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validation: Evaluate the model using R^2 score
cv_scores = cross_val_score(ridge_model, X_poly, y, cv=kf, scoring='r2')

# Train the model on the entire dataset for final evaluation
ridge_model.fit(X_poly, y)

# Generate predictions
y_pred = ridge_model.predict(X_poly)

# Calculate R² (rounded)
r2 = r2_score(y, y_pred)
r2_rounded = round(r2, 2)  # Keep two decimal points and round correctly

# Calculate the F-statistic and p-value using the formula for the F-statistic
n = len(y)  # Sample size
p = X_poly.shape[1]  # Number of predictors (including polynomial terms)

# Residual sum of squares
rss = np.sum((y - y_pred) ** 2)

# Total sum of squares
tss = np.sum((y - np.mean(y)) ** 2)

# F-statistic
f_stat = (tss - rss) / p / (rss / (n - p - 1))

# P-value from F-statistic
p_value = 1 - stats.f.cdf(f_stat, p, n - p - 1)

# Check if the p-value is less than 0.001
p_value_text = f'P-value = {p_value:.4f}'
if p_value < 0.001:
    p_value_text = 'P-value < 0.001'

# Create the plot of actual vs predicted
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, label='Actual vs Predicted', color='blue', alpha=0.6)

# Plot a line for perfect predictions (where predicted = actual)
plt.plot([min(y), max(y)], [min(y), max(y)], color='red', linestyle='--', label='Perfect Prediction')

# Display the R² value and p-value on the plot
plt.text(0.1, 0.9, f'R² = {r2_rounded}', transform=plt.gca().transAxes, fontsize=14, verticalalignment='top')

# Labels and title
plt.xlabel('Actual N2O')
plt.ylabel('Predicted N2O')
plt.title('Ridge Polynomial Regression: Actual vs Predicted N2O')

# Add a legend
plt.legend()

# Show the plot
plt.show()

In [None]:
import pandas as pd
import folium

# Load the CSV data
file_path = 'N2O.csv'  # Path to  CSV file
data = pd.read_csv(file_path)

# Ensure latitude and longitude are valid numeric values
data_cleaned = data.dropna(subset=['LAT', 'LONG'])  # Remove NaN values
data_cleaned = data_cleaned[pd.to_numeric(data_cleaned['LAT'], errors='coerce').notnull()]
data_cleaned = data_cleaned[pd.to_numeric(data_cleaned['LONG'], errors='coerce').notnull()]

# Extract cleaned LAT and LONG values
latitude_clean = data_cleaned['LAT']
longitude_clean = data_cleaned['LONG']

# Initialize the map centered on China
china_map = folium.Map(location=[35.8617, 104.1954], zoom_start=5)

# Add each point as a small red dot
for lat, lon in zip(latitude_clean, longitude_clean):
    folium.CircleMarker(
        location=[lat, lon],
        radius=2,  # Smaller dot
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.8
    ).add_to(china_map)

# Save and display the map
china_map.save('china_n2o.html')
china_map


In [None]:
#Correlation Graph for N2O
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("N2O.csv")

# Select relevant columns
columns = ['N2O', 'WT', 'WTN', 'WNO3-', 'WNH4+', 'DOC', 'DO', 'ST', 'STN', 'SNO3-', 'SCN']
df_selected = df[columns]

# Compute Pearson correlation coefficient
correlation_matrix = df_selected.corr(method='pearson')

# Print the correlation table
print(correlation_matrix)

# Plot heatmap for visualization
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Pearson Correlation Coefficient Matrix")
plt.show()
