# Reduce Dimensionality using Mean and PCA

In [1]:
import pandas as pd
from sklearn.decomposition import PCA

# Load the dataset (replace 'your_file_path.csv' with your file's path)
file_path = '/kaggle/input/wind-nuts-data/wind_nuts2 (1).csv'
wind_data = pd.read_csv(file_path)

# Convert the 'time' column to datetime and set it as the index
wind_data['time'] = pd.to_datetime(wind_data['time'])
wind_data.set_index('time', inplace=True)

# Resample the data to monthly frequency and compute the mean for each location
monthly_wind_data = wind_data.resample('M').mean()

# Function to reduce dimensionality using PCA
def reduce_dimensionality(data, n_components):
    """
    Reduces dimensionality of the data using PCA.
    
    Parameters:
        data (pd.DataFrame): The input data to reduce.
        n_components (int): The number of components to reduce to.
    
    Returns:
        pd.DataFrame: The data with reduced dimensions.
    """
    pca = PCA(n_components=n_components)
    reduced_data = pca.fit_transform(data)
    columns = [f'PC{i+1}' for i in range(n_components)]
    return pd.DataFrame(reduced_data, columns=columns, index=data.index)

# Reduce the monthly data to 1 dimension
reduced_data_1d = reduce_dimensionality(monthly_wind_data, n_components=1)

# Reduce the monthly data to 4 dimensions
reduced_data_4d = reduce_dimensionality(monthly_wind_data, n_components=4)

# Save results or use them for further analysis
reduced_data_1d.to_csv('reduced_data_1d.csv')
reduced_data_4d.to_csv('reduced_data_4d.csv')

# Print a preview of the results
print("Reduced data (1D):")
print(reduced_data_1d.head())

print("\nReduced data (4D):")
print(reduced_data_4d.head())


Reduced data (1D):
                 PC1
time                
1980-01-31 -0.091103
1980-02-29 -0.216146
1980-03-31  0.071958
1980-04-30  0.015513
1980-05-31 -0.119835

Reduced data (4D):
                 PC1       PC2       PC3       PC4
time                                              
1980-01-31 -0.091103 -0.053056 -0.035989 -0.004181
1980-02-29 -0.216146 -0.037715  0.005181 -0.025659
1980-03-31  0.071958  0.041418  0.018069  0.001663
1980-04-30  0.015513  0.005105 -0.007333  0.022936
1980-05-31 -0.119835 -0.047358 -0.006206  0.028172


# Reduce Dimensionality using Seasonality and PCA

In [1]:
import pandas as pd
from sklearn.decomposition import PCA
from statsmodels.tsa.seasonal import seasonal_decompose

# Load the dataset
file_path = '/kaggle/input/wind-nuts-data/wind_nuts2 (1).csv'
wind_data = pd.read_csv(file_path)

# Convert the 'time' column to datetime and set it as the index
wind_data['time'] = pd.to_datetime(wind_data['time'])
wind_data.set_index('time', inplace=True)

# Step 1: Aggregate data to monthly level
monthly_wind_data = wind_data.resample('M').mean()

# Function to decompose time series and extract seasonal component
def extract_seasonality(data, period):
    """
    Extracts the seasonal component of each column in the data using seasonal decomposition.
    
    Parameters:
        data (pd.DataFrame): Time series data with a DateTime index.
        period (int): The period of seasonality (e.g., 12 for yearly seasonality).
    
    Returns:
        pd.DataFrame: DataFrame of seasonal components for each column.
    """
    seasonal_components = pd.DataFrame(index=data.index)
    for col in data.columns:
        decomposed = seasonal_decompose(data[col], model='additive', period=period, extrapolate_trend='freq')
        seasonal_components[col] = decomposed.seasonal
    return seasonal_components

# Step 2: Extract seasonal components for all locations (using yearly seasonality)
period = 12  # Yearly period for monthly data
seasonal_data = extract_seasonality(monthly_wind_data, period=period)

# Step 3: Perform PCA for dimensionality reduction
def reduce_dimensionality(data, n_components):
    """
    Reduces dimensionality of the data using PCA.
    
    Parameters:
        data (pd.DataFrame): The input data to reduce.
        n_components (int): The number of components to reduce to.
    
    Returns:
        pd.DataFrame: The data with reduced dimensions.
    """
    pca = PCA(n_components=n_components)
    reduced_data = pca.fit_transform(data)
    columns = [f'PC{i+1}' for i in range(n_components)]
    return pd.DataFrame(reduced_data, columns=columns, index=data.index)

# Step 4: Reduce the seasonal data to 1 dimension
reduced_seasonal_1d = reduce_dimensionality(seasonal_data, n_components=1)

# Step 5: Reduce the seasonal data to 4 dimensions
reduced_seasonal_4d = reduce_dimensionality(seasonal_data, n_components=4)

# Save results or use them for further analysis
reduced_seasonal_1d.to_csv('reduced_seasonal_1d_monthly.csv')
reduced_seasonal_4d.to_csv('reduced_seasonal_4d_monthly.csv')

# Print a preview of the results
print("Reduced seasonal data (1D, monthly):")
print(reduced_seasonal_1d.head())

print("\nReduced seasonal data (4D, monthly):")
print(reduced_seasonal_4d.head())


Reduced seasonal data (1D, monthly):
                 PC1
time                
1980-01-31  0.327430
1980-02-29  0.199927
1980-03-31  0.109160
1980-04-30 -0.117771
1980-05-31 -0.172712

Reduced seasonal data (4D, monthly):
                 PC1       PC2       PC3       PC4
time                                              
1980-01-31  0.327430 -0.012249 -0.002050 -0.001261
1980-02-29  0.199927 -0.010180 -0.001167  0.000954
1980-03-31  0.109160 -0.016742  0.008015  0.000209
1980-04-30 -0.117771 -0.007416 -0.004004  0.002038
1980-05-31 -0.172712  0.002191 -0.003529  0.004993
