In [5]:
######## Reading MetOp #######
import os
import math
import netCDF4 as nc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def calculate_mean_values(file_path):
    netcdf_file = nc.Dataset(file_path, 'r')
    press_array = netcdf_file.variables['press'][:]
    specific_humidity = netcdf_file.variables['shum'][:]
    temp = netcdf_file.variables['temp'][:]  # Corrected variable name
    shum1 = specific_humidity / 1000
    ev = (shum1 * press_array) / (0.662 + 0.378 * shum1)  # Corrected calculation
    Tc = temp - 273.15
    es = 6.122 * np.exp((17.67 * Tc) / (Tc + 243.5))
    rh = 100 * (ev / es)
    ho = 7000
    p0 = 1013.15
    heights = []
    mean_values = []
    for press_value in press_array:
        height = ho * np.log(p0 / press_value)
        heights.append(height / 1000)
    for i in np.arange(0.5, 30.5, 1):
        low_num = i - 0.5
        high_num = i + 0.5
        m = (heights >= low_num) & (heights <= high_num)
        selected_temp_data = rh[m]  # Changed to use relative humidity
        selected_data = np.mean(selected_temp_data)
        mean_values.append(np.round(selected_data, 4))
    return mean_values

##plt.figure(figsize=(10, 6))
##plt.plot(mean_values, np.arange(0.5, 30.5, 1), marker='o', linestyle='-')
##plt.xlabel('Rh')
##plt.ylabel('Height')
##plt.show()
##print(mean_values)
root_directory =  "C:\\Users\\javva\\Desktop\\2022"
all_dfs = []
for folder_name in os.listdir(root_directory):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):
        result_data = []
        lats = []
        longs = []
        years = []
        months = []
        days = []
        Time = []

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.nc'):
                current_file_path = os.path.join(folder_path, file_name)
                netcdf_file = nc.Dataset(current_file_path, 'r')
                variable_data_lat = netcdf_file.variables['lat'][:]
                variable_data_lon = netcdf_file.variables['lon'][:]
                date_str = file_name.split("_")[1]
                year = int(date_str[:4])
                day = int(date_str[4:6])
                month = int(date_str[6:8])
                time_str = file_name.split("_")[2]
                hour = int(time_str[:2])
                minutes = int(time_str[2:4])
                seconds = int(time_str[4:6])
                hours = round(hour + minutes / 60 + seconds / 3600, 2)
                years.append(year)
                days.append(day)
                months.append(month)
                Time.append(hours)

                mean_values = calculate_mean_values(current_file_path)

                if mean_values is not None and len(mean_values) == 30:
                    mean_values = ['NAN' if math.isnan(val) else val for val in mean_values]
                    result_data.append(mean_values)
                else:
                    result_data.append(['NAN'] * 30)  # Corrected number of elements

                rounded_lat = [round(lat, 2) for lat in variable_data_lat]
                rounded_lon = [round(lon, 2) for lon in variable_data_lon]
                lats.append(', '.join(map(lambda x: "{:.2f}".format(x), rounded_lat)))
                longs.append(', '.join(map(lambda x: "{:.2f}".format(x), rounded_lon)))

        data = {
            'latitudes': lats,
            'longitudes': longs,
            'Year': years,
            'Month': months,
            'Day': days,
            'Time': Time,
        }

        for i in range(30):
            data[f'hum_{i+1}'] = [item[i] for item in result_data]

        df = pd.DataFrame(data)
        df = df.replace('nan', 'NAN')

        all_dfs.append(df)

final_df = pd.concat(all_dfs, axis=0, ignore_index=True)
output_file_name = "2021_senti_hum.csv"
final_df.to_csv(output_file_name, index=False)

print(f"Data saved to {output_file_name}")

  mean_values = ['NAN' if math.isnan(val) else val for val in mean_values]


Data saved to 2021_senti_hum.csv


In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Load your CSV data
df = pd.read_csv("2021_senti_hum.csv")
all_lats_1=[]
all_lons_1=[]
lats=df['latitudes']
lons=df['longitudes']
all_lats_1.extend(lats)
all_lons_1.extend(lons)
# Define grid parameters
dln = 2.5
dln_bound = dln / 2
req_lon = np.arange(-180, 180, dln)
req_lat = np.arange(-90, 90, dln)

# Loop through humidity columns (hum_1 to hum_30)
for hum_col in range(1, 31):
    # Initialize arrays to store aggregated humidity values
    g1_values = np.empty((len(req_lon), len(req_lat)))

    # Loop through grid cells
    for i, lon_center in enumerate(req_lon):
        for j, lat_center in enumerate(req_lat):
            # Define bounds for the current grid cell
            lon_min = lon_center - dln_bound
            lon_max = lon_center + dln_bound
            lat_min = lat_center - dln_bound
            lat_max = lat_center + dln_bound

            # Filter data within the current grid cell
            filtered_df = df[(df['longitudes'] >= lon_min) &
                             (df['longitudes'] <= lon_max) &
                             (df['latitudes'] >= lat_min) &
                             (df['latitudes'] <= lat_max)]
            valid_humidity_values = pd.to_numeric(filtered_df['hum_' + str(hum_col)], errors='coerce')
            g1_values[i, j] = np.nanmean(valid_humidity_values)

    # Create a DataFrame from the aggregated humidity values
    g1_values_df = pd.DataFrame(g1_values.T, columns=req_lon, index=req_lat)

    # Save the DataFrame to a CSV file with a unique name for each column
    filename = f"hum_{hum_col}.csv"
    g1_values_df.to_csv(filename)
    lon_grid, lat_grid = np.meshgrid(req_lon, req_lat)
    # Create a custom IDW interpolation function
    def idw_interpolate(x, y, values, xi, yi, power=2):
        dist = np.sqrt((x - xi)**2 + (y - yi)**2)
        weights = 1.0 / (dist**power)
        weights /= np.sum(weights)
        result = np.sum(values * weights)
        return result

    # Perform IDW interpolation for the current humidity column
    g1_values = g1_values.T  
    idw_values = np.empty(lon_grid.shape)
    for i in range(lon_grid.shape[0]):
        for j in range(lon_grid.shape[1]):
            lon_val = lon_grid[i, j]
            lat_val = lat_grid[i, j]
            g1_val = g1_values[i, j]
            if not np.isnan(g1_val):
                interpolated_value = idw_interpolate(all_lats_1, all_lons_1, g1_val, lon_val, lat_val)
                idw_values[i, j] = interpolated_value
            else:
                idw_values[i, j] = None

    idw_df = pd.DataFrame(idw_values, columns=req_lon, index=req_lat)

    # Save the IDW output to a separate file for each humidity column
    idw_filename = f"idw2_values_{hum_col}.csv"
    idw_df.to_csv(idw_filename)

    # Plot the IDW output for the current humidity column
    plt.figure(figsize=(12, 6))
    plt.imshow(idw_df, cmap='viridis', extent=[-180, 180, -90, 90], aspect='auto')
    plt.colorbar(label=f'hum_{hum_col} %')
    plt.clim(0, 100)
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.title(f'Map for hum_{hum_col} 10 days')
    plt.savefig(f'idw_plot_{hum_col}.png')
    plt.close() # Close the current plot to avoid overlapping

  g1_values[i, j] = np.nanmean(valid_humidity_values)


In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Load your CSV data
df = pd.read_csv("2021_senti_hum.csv")
all_lats_1=[]
all_lons_1=[]
lats=df['latitudes']
lons=df['longitudes']
all_lats_1.extend(lats)
all_lons_1.extend(lons)
# Define grid parameters
dln = 2.5
dln_bound = dln / 2
req_lon = np.arange(-180, 180, dln)
req_lat = np.arange(-90, 90, dln)

# Loop through humidity columns (hum_1 to hum_30)
for hum_col in range(1, 31):
    # Initialize arrays to store aggregated humidity values
    g1_values = np.empty((len(req_lon), len(req_lat)))

    # Loop through grid cells
    for i, lon_center in enumerate(req_lon):
        for j, lat_center in enumerate(req_lat):
            # Define bounds for the current grid cell
            lon_min = lon_center - dln_bound
            lon_max = lon_center + dln_bound
            lat_min = lat_center - dln_bound
            lat_max = lat_center + dln_bound

            # Filter data within the current grid cell
            filtered_df = df[(df['longitudes'] >= lon_min) &
                             (df['longitudes'] <= lon_max) &
                             (df['latitudes'] >= lat_min) &
                             (df['latitudes'] <= lat_max)]
            valid_humidity_values = pd.to_numeric(filtered_df['hum_' + str(hum_col)], errors='coerce')
            g1_values[i, j] = np.nanmean(valid_humidity_values)

    # Create a DataFrame from the aggregated humidity values
    g1_values_df = pd.DataFrame(g1_values.T, columns=req_lon, index=req_lat)

    # Save the DataFrame to a CSV file with a unique name for each column
    filename = f"hum_{hum_col}.csv"
    g1_values_df.to_csv(filename)
    lon_grid, lat_grid = np.meshgrid(req_lon, req_lat)
    # Create a custom IDW interpolation function
    def idw_interpolate(x, y, values, xi, yi, power=2):
        dist = np.sqrt((x - xi)**2 + (y - yi)**2)
        weights = 1.0 / (dist**power)
        weights /= np.sum(weights)
        result = np.sum(values * weights)
        return result

    # Perform IDW interpolation for the current humidity column
    g1_values = g1_values.T  
    idw_values = np.empty(lon_grid.shape)
    for i in range(lon_grid.shape[0]):
        for j in range(lon_grid.shape[1]):
            lon_val = lon_grid[i, j]
            lat_val = lat_grid[i, j]
            g1_val = g1_values[i, j]
            if not np.isnan(g1_val):
                interpolated_value = idw_interpolate(all_lats_1, all_lons_1, g1_val, lon_val, lat_val)
                idw_values[i, j] = interpolated_value
            else:
                idw_values[i, j] = interpolated_value

    idw_df = pd.DataFrame(idw_values, columns=req_lon, index=req_lat)

    # Save the IDW output to a separate file for each humidity column
    idw_filename = f"idw2_values_{hum_col}.csv"
    idw_df.to_csv(idw_filename)

    # Plot the IDW output for the current humidity column
    plt.figure(figsize=(12, 6))
    plt.imshow(idw_df, cmap='viridis', extent=[-180, 180, -90, 90], aspect='auto')
    plt.colorbar(label=f'hum_{hum_col} %')
    plt.clim(0, 100)
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.title(f'Map for hum_{hum_col} 10 days')
    plt.savefig(f'idw_plot_{hum_col}.png')
    plt.close() # Close the current plot to avoid overlapping

  g1_values[i, j] = np.nanmean(valid_humidity_values)


In [None]:
import numpy as np
import netCDF4 as nc
import pandas as pd

# Define the target dimensions
target_rows = 72
target_cols = 144

# Create NetCDF file
with nc.Dataset('D:\\new_combined0000_file.nc', 'w') as new_combined_dataset:
    new_combined_dataset.createDimension('longitude', target_cols)
    new_combined_dataset.createDimension('latitude', target_rows)

    longitude_var = new_combined_dataset.createVariable('longitude', 'f4', ('longitude',))
    latitude_var = new_combined_dataset.createVariable('latitude', 'f4', ('latitude',))

    # Set units for latitude and longitude
    latitude_var.units = 'degrees_north'
    longitude_var.units = 'degrees_east'

    all_latitudes = []
    all_longitudes = []

    # Initialize a list to store specific humidity variables
    specific_humidity_vars = []

    for i in range(1, 21):
        file_name = f'idw1_values_{i}.csv'
        csv_data = pd.read_csv(file_name, header=None)
        data = csv_data.values
        data = data.astype(float)

        # Check if the data shape matches the target dimensions, if not, resize
        if data.shape != (target_rows, target_cols):
            data = data[:target_rows, :target_cols]

        # Set specific humidity data
        variable_name = f'specific_humidity_{i}'
        data_var = new_combined_dataset.createVariable(variable_name, 'f4', ('latitude', 'longitude'))
        data_var[:] = data

        # Set units for specific humidity
        data_var.units = 'percentage'

        # Collect latitude and longitude data from the first file
        if i == 1:
            all_longitudes = data[0, :]
            all_latitudes = data[:, 0]

        # Append the specific humidity variable to the list
        specific_humidity_vars.append(data_var)

    # Set the common longitude and latitude data for the combined file
    longitude_var[:] = all_longitudes
    latitude_var[:] = all_latitudes

print("Data from all CSV files has been successfully imported into the NetCDF4 file.")
from netCDF4 import Dataset
ncfile=Dataset('D:\\new_combined0000_file.nc','r')
lat_data=ncfile.variables['latitude'][:]
lon_data=ncfile.variables['longitude'][:]
hum_data=ncfile.variables['specific_humidity_8']
hum_data