# This is how we get and organize the data

# Get NC4 data

In [1]:
import pandas as pd 
import datetime as dt 
import numpy as np
import xarray as xr
import os
import requests
import glob


#Display all columns
pd.set_option('display.max_columns', None)

# Open 1 Nc4 file 

In [8]:
# Open the NetCDF file
ds = xr.open_dataset(r"C:\Users\LENOVO\Downloads\AIRS.2025.05.01.L3.RetStd_IR031.v7.0.7.0.G25188113435.hdf.nc4")

print(ds)  # shows variables, dimensions, attributes

<xarray.Dataset> Size: 264kB
Dimensions:     (Latitude: 180, Longitude: 360)
Coordinates:
  * Longitude   (Longitude) float64 3kB -180.0 -179.0 -178.0 ... 178.0 179.0
  * Latitude    (Latitude) float64 1kB 90.0 89.0 88.0 87.0 ... -87.0 -88.0 -89.0
Data variables:
    CloudFrc_A  (Latitude, Longitude) float32 259kB ...
Attributes: (12/420)
    HDFEOSVersion:                                        HDFEOS_V2.18
    identifier_product_doi:                               10.5067/UBENJB9D3T2H
    identifier_product_doi_authority:                     http://dx.doi.org/
    history:                                              2025-10-05 17:50:51...
    history_json:                                         [{"$schema":"https:...
    ascending._FV_TotalCounts_A:                          -9999.0
    ...                                                   ...
    location.H2OPressureLev:                              1000.0
    location.H2OPressureLay:                              961.7692
    locati

In [9]:
# show just the variable names
print("Variables:", list(ds.variables))


Variables: ['CloudFrc_A', 'Longitude', 'Latitude']


In [10]:
# Convert to a DataFrame (flattened)
df = ds.to_dataframe().reset_index()
df.head() # show first few rows

Unnamed: 0,Latitude,Longitude,CloudFrc_A
0,90.0,-180.0,0.052002
1,90.0,-179.0,0.609375
2,90.0,-178.0,0.0
3,90.0,-177.0,0.431641
4,90.0,-176.0,0.226562


In [5]:
#Check the shape of the dataframe
print(df.shape)

(1728000, 6)


In [10]:
#Check null values
print(df.isnull().sum())

time                0
bnds                0
lon                 0
lat                 0
time_bnds           0
Swnet_tavg    1242494
dtype: int64


In [5]:
# Access a variable (e.g., "CO2")
co2 = ds["xco2"]
print(co2)

<xarray.DataArray 'xco2' (sounding_id: 933)> Size: 4kB
[933 values with dtype=float32]
Coordinates:
    longitude    (sounding_id) float32 4kB ...
    latitude     (sounding_id) float32 4kB ...
  * sounding_id  (sounding_id) float64 7kB 2.009e+13 2.009e+13 ... 2.009e+13
Attributes:
    units:      ppm
    long_name:  XCO2
    comment:    Column-averaged dry-air mole fraction of CO2 (includes bias c...


In [11]:
#Export to CSV
df.to_csv(r"C:\Users\LENOVO\Downloads\Co2.csv")

# Merge NC4 to CSV

In [None]:
# 1️⃣ Folder containing your .nc4 files
folder = r"C:\Users\LENOVO\Downloads\NetShortwave" # change to your folder path

# 2️⃣ Find all .nc4 files in that folder
files = sorted(glob.glob(os.path.join(folder, "*.nc4")))
print(f"Found {len(files)} files")

# 3️⃣ Store all dataframes
all_dfs = []

# 4️⃣ Loop through files
for f in files:
    try:
        ds = xr.open_dataset(f)
        print(f"Processing {os.path.basename(f)}")

        # Extract only desired columns
        df = ds[].to_dataframe().reset_index()

        # Optional: drop duplicates or NaN if any
        #df = df.dropna(subset=["SWnet"])

        all_dfs.append(df)
        ds.close()

    except Exception as e:
        print(f"⚠️ Error reading {f}: {e}")

# 5️⃣ Merge all DataFrames
if all_dfs:
    merged_df = pd.concat(all_dfs, ignore_index=True)
    print(f"✅ Merged shape: {merged_df.shape}")

    # 6️⃣ Export to CSV
    merged_df.to_csv(r"C:\Users\LENOVO\Downloads\Except_Cloud.csv", index=False)
    print("💾 Saved to merged_SWnet_data.csv")
else:
    print("❌ No data to merge — check your files or variable names.")

Found 24 files
Processing GLDAS_NOAH10_M.A202301.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202302.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202303.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202304.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202305.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202306.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202307.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202308.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202309.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202310.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202311.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202312.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202401.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202402.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202403.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202404.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202405.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202406.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202407.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202408.0

### Export the entire CSV all Variables 

In [4]:
import xarray as xr
import pandas as pd

# (assuming you already opened ds)
ds = xr.open_dataset(r"C:\Users\LENOVO\Downloads\3A12.20150301.7.HDF.nc4")

# Convert all variables into a DataFrame
df = ds.to_dataframe().reset_index()

# Show first few rows
print(df.head())



     nlon   nlat  nlayer  surfacePrecipitation  surfaceRain  \
0 -179.75 -39.75     0.5                   NaN          NaN   
1 -179.75 -39.75     1.0                   NaN          NaN   
2 -179.75 -39.75     1.5                   NaN          NaN   
3 -179.75 -39.75     2.0                   NaN          NaN   
4 -179.75 -39.75     2.5                   NaN          NaN   

   convectPrecipitation  cldWater  rainWater  cldIce  snow  graupel  \
0                   NaN       NaN        NaN     NaN   NaN      NaN   
1                   NaN       NaN        NaN     NaN   NaN      NaN   
2                   NaN       NaN        NaN     NaN   NaN      NaN   
3                   NaN       NaN        NaN     NaN   NaN      NaN   
4                   NaN       NaN        NaN     NaN   NaN      NaN   

   latentHeat  npixTotal  npixPrecipitation  fractionQuality0  \
0         NaN        0.0                0.0               NaN   
1         NaN        0.0                0.0               NaN   

In [7]:
# Export to CSV
df.to_csv(r"C:\Users\LENOVO\Downloads\test.csv", index=False)

In [5]:
# show just the variable names
print("Variables:", df.columns.tolist())

Variables: ['nlon', 'nlat', 'nlayer', 'surfacePrecipitation', 'surfaceRain', 'convectPrecipitation', 'cldWater', 'rainWater', 'cldIce', 'snow', 'graupel', 'latentHeat', 'npixTotal', 'npixPrecipitation', 'fractionQuality0', 'fractionQuality1', 'fractionQuality2']


In [None]:
#NC4 is a very large so filter
df_small = df[["latitude", "longitude", "time", "xco2", "xco2_uncertainty"]]
df_small.to_csv(r"D:\Hackathon\Dataset\ACOS & OCO\ACOS\Acos_co2.csv", index=False)

In [None]:
#Read_csv
df = pd.read_csv(r"C:\Users\LENOVO\Downloads\Acos_Co2.csv")
df.head()

# Automate download data and merge 

In [15]:
import os
import requests
import xarray as xr
import pandas as pd

# 1️⃣ Paths
links_file = r"C:\Users\LENOVO\Downloads\Rain&SnowList.txt"
download_dir = r"C:\Users\LENOVO\Downloads\Rain&Snow"
os.makedirs(download_dir, exist_ok=True)

# 2️⃣ NASA Earthdata token
token = "eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImtpZXRkYXRhY3V0ZSIsImV4cCI6MTc2NDg1NzQ4NywiaWF0IjoxNzU5NjczNDg3LCJpc3MiOiJodHRwczovL3Vycy5lYXJ0aGRhdGEubmFzYS5nb3YiLCJpZGVudGl0eV9wcm92aWRlciI6ImVkbF9vcHMiLCJhY3IiOiJlZGwiLCJhc3N1cmFuY2VfbGV2ZWwiOjN9.prA4yN8UfzIAFYyJ5HLaYg-_MWCGJ72pT89NVpXy9iTUNeh431WGdwAglnC9T3Cr7MkYgEisSmalksN3HoHuruZs06oPyTp0IycGh9lQio8sozZSTtdtp8oI_p5IVhSHasM6X4EtbMQpLwxXfMDsaliAKLbYPH4Nytmr04ezwID9uyWtmy5Y6AesyUZ7zCtaoFcmgPAfn__oYhu133yW_9WIsQYx8Xs1HZgyvMNB7q2cYAfITX5RP2ZbPiFJRDaZUXEe5VplM4t5ZluOOnNJabHvm3J5mCJ98VSuFvhbyhzdhVQHq_G8pN_jfMmDBe63jogWZx-HBuxaRSsNRE3uiA"  # 👈 paste your token

headers = {"Authorization": f"Bearer {token}"}

# 3️⃣ Read link
with open(links_file, "r") as f:
    links = [line.strip() for line in f if line.strip()]

all_data = []

# 4️⃣ Loop through each link
for i, url in enumerate(links, 1):
    try:
        filename = os.path.join(download_dir, os.path.basename(url))

        # --- download if not already ---
        if not os.path.exists(filename):
            print(f"📥 Downloading {i}/{len(links)}: {url}")
            response = requests.get(url, headers=headers)
            response.raise_for_status()

            with open(filename, "wb") as f:
                f.write(response.content)

        # --- open dataset ---
        ds = xr.open_dataset(filename)

        # --- extract key variables ---
        df = ds[['time', 'time_bnds', 'lon', 'lat', 'Snowf_tavg', 'Rainf_f_tavg']].to_dataframe().reset_index()

        all_data.append(df)
        ds.close()

    except Exception as e:
        print(f"⚠️ Error processing {url}: {type(e).__name__} - {e}")

# 5️⃣ Combine all datasets
if all_data:
    merged_df = pd.concat(all_data, ignore_index=True)
    merged_df.to_csv("oco2_combined.csv", index=False)
    print("✅ Done! Saved as 'oco2_combined.csv'")
else:
    print("❌ No data was successfully loaded. Check your token or network.")


⚠️ Error processing https://docserver.gesdisc.eosdis.nasa.gov/public/project/hydrology/README_GLDAS2.pdf: ValueError - did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
📥 Downloading 2/25: https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?FILENAME=%2Fdata%2FGLDAS%2FGLDAS_NOAH025_M.2.1%2F2023%2FGLDAS_NOAH025_M.A202301.021.nc4&VERSION=1.02&FORMAT=bmM0Lw&BBOX=-60%2C-180%2C90%2C180&SHORTNAME=GLDAS_NOAH025_M&DATASET_VERSION=2.1&SERVICE=L34RS_LDAS&LABEL=GLDAS_NOAH025_M.A202301.021.nc4.SUB.nc4&VARIABLES=Rainf_f_tavg%2CSnowf_tavg
⚠️ Error processing https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?FILENAME=%2Fdata%2FGLDAS%2FGLDAS_NOAH025_M.2.1%2F2023%2FG

KeyboardInterrupt: 

In [8]:
# 5️⃣ Combine all daily datasets
merged_df = pd.concat(all_data, ignore_index=True)

# 6️⃣ Export to CSV
merged_df.to_csv(r"C:\Users\LENOVO\Downloads\co2.csv", index=False)

print("✅ Done! Saved as 'oco2_combined.csv'")

✅ Done! Saved as 'oco2_combined.csv'


# CDF File 

In [9]:
# Path to your file
file_path = r"C:\Users\LENOVO\Downloads\WindSpeed\MERRA2_400.instM_2d_lfo_Nx.202301.SUB.nc"

# Open the NetCDF file
ds = xr.open_dataset(file_path)

# Display dataset info
print(ds)

<xarray.Dataset> Size: 839kB
Dimensions:   (time: 1, lon: 576, lat: 361)
Coordinates:
  * time      (time) datetime64[ns] 8B 2023-01-01
  * lon       (lon) float64 5kB -180.0 -179.4 -178.8 ... 178.1 178.8 179.4
  * lat       (lat) float64 3kB -90.0 -89.5 -89.0 -88.5 ... 88.5 89.0 89.5 90.0
Data variables:
    SPEEDLML  (time, lat, lon) float32 832kB ...
Attributes: (12/32)
    CDI:                               Climate Data Interface version 1.9.8 (...
    Conventions:                       CF-1
    Contact:                           http://gmao.gsfc.nasa.gov
    History:                           Original file generated: Sun Feb 12 02...
    Filename:                          MERRA2_400.instM_2d_lfo_Nx.202301.nc4
    Comment:                           GMAO filename: d5124_m2_jan10.inst1_2d...
    ...                                ...
    DataResolution:                    0.5 x 0.625
    identifier_product_doi:            10.5067/11F99Y6TXN99
    RangeBeginningTime:                00

In [10]:
#see the variables
print("Variables:", list(ds.variables))

Variables: ['time', 'lon', 'lat', 'SPEEDLML']


In [11]:
#change into dataframe
df = ds.to_dataframe().reset_index()
df.head()

Unnamed: 0,time,lon,lat,SPEEDLML
0,2023-01-01,-180.0,-90.0,5.493866
1,2023-01-01,-180.0,-89.5,4.914066
2,2023-01-01,-180.0,-89.0,4.676849
3,2023-01-01,-180.0,-88.5,5.066791
4,2023-01-01,-180.0,-88.0,6.082662


## Merge all CDF file 

In [16]:


# Folder containing your .nc files
folder = r"C:\Users\LENOVO\Downloads\AirTemp"

# Step 1: Find all .nc or .nc4 files in the folder
files = glob.glob(os.path.join(folder, "*.nc")) + glob.glob(os.path.join(folder, "*.nc4"))

print(f"✅ Found {len(files)} files")

# Step 2: Open and combine them
ds = xr.open_mfdataset(files, combine='by_coords')

# Step 3: Convert to DataFrame and save
df = ds.to_dataframe().reset_index()
df.to_csv(r"C:\Users\LENOVO\Downloads\AirTemp.csv", index=False)
print("✅ Saved merged_cloud_data.csv")



✅ Found 24 files
✅ Saved merged_cloud_data.csv


# Fix the spatial resolution 

In [16]:
import xarray as xr
import numpy as np

# Open the cloud dataset
cloud_ds = pd.read_csv(r"C:\Users\LENOVO\Downloads\Cloud.csv")  # replace with your file
# Assume cloud variable is 'CloudFrc'

# Create 1° bins for lon and lat
cloud_ds['lon_bin'] = np.round(cloud_ds['lon'])
cloud_ds['lat_bin'] = np.round(cloud_ds['lat'])

# Group by the new 1° bins and take the mean
cloud_agg = cloud_ds.groupby(['time', 'lon_bin', 'lat_bin']).mean()

# Rename bins to standard lon/lat for merging
cloud_agg = cloud_agg.rename({'lon_bin': 'lon', 'lat_bin': 'lat'})

# Convert to DataFrame if needed
cloud_agg_df = cloud_agg.reset_index()

# Assuming your DataFrame is named df
cloud_agg_df['time'] = pd.to_datetime(cloud_agg_df['time']).dt.strftime('%Y-%m-%d')

# Keep only the needed columns
cloud_agg_df = cloud_agg_df[['time', 'lon', 'lat', 'CLDTOT']]

# Display result
cloud_agg_df

Unnamed: 0,time,lon,lat,CLDTOT
0,2023-01-01,-180.0000,-89.75,0.497406
1,2023-01-01,-180.0000,-89.00,0.554248
2,2023-01-01,-180.0000,-88.00,0.542849
3,2023-01-01,-180.0000,-87.00,0.502070
4,2023-01-01,-180.0000,-86.00,0.459127
...,...,...,...,...
1563835,2024-12-01,179.0625,86.00,0.966063
1563836,2024-12-01,179.0625,87.00,0.950423
1563837,2024-12-01,179.0625,88.00,0.937046
1563838,2024-12-01,179.0625,89.00,0.955067


In [17]:
#Export to CSV
cloud_agg_df.to_csv(r"C:\Users\LENOVO\Downloads\Cloud1X1.csv", index=False)

# merge NC4 into CSV 

In [13]:
# 1️⃣ Folder containing your .nc4 files
folder = r"C:\Users\LENOVO\Downloads\All_except_Cloud" # change to your folder path

# 2️⃣ Find all .nc4 files in that folder
files = sorted(glob.glob(os.path.join(folder, "*.nc4")))
print(f"Found {len(files)} files")

# 3️⃣ Store all dataframes
all_dfs = []

# 4️⃣ Loop through files
for f in files:
    try:
        ds = xr.open_dataset(f)
        print(f"Processing {os.path.basename(f)}")

        # Extract only desired columns
        df = ds[['time', 'time_bnds', 'lon', 'lat', 'Swnet_tavg', 'Snowf_tavg', 'Rainf_tavg', 'Wind_f_inst', 'Tair_f_inst']].to_dataframe().reset_index()

        # Optional: drop duplicates or NaN if any
        #df = df.dropna(subset=["SWnet"])

        all_dfs.append(df)
        ds.close()

    except Exception as e:
        print(f"⚠️ Error reading {f}: {e}")

# 5️⃣ Merge all DataFrames
if all_dfs:
    merged_df = pd.concat(all_dfs, ignore_index=True)
    print(f"✅ Merged shape: {merged_df.shape}")

    # 6️⃣ Export to CSV
    merged_df.to_csv(r"C:\Users\LENOVO\Downloads\Except_Cloud.csv", index=False)
    print("💾 Saved to merged_SWnet_data.csv")
else:
    print("❌ No data to merge — check your files or variable names.")

Found 24 files
Processing GLDAS_NOAH10_M.A202301.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202302.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202303.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202304.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202305.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202306.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202307.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202308.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202309.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202310.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202311.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202312.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202401.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202402.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202403.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202404.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202405.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202406.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202407.021.nc4.SUB.nc4
Processing GLDAS_NOAH10_M.A202408.0

# See data

In [2]:
df = pd.read_csv(r"C:\Users\LENOVO\Downloads\Except_Cloud.csv")
df

Unnamed: 0,time,bnds,lon,lat,time_bnds,Swnet_tavg,Snowf_tavg,Rainf_tavg,Wind_f_inst,Tair_f_inst
0,2023-01-01,0,-179.5,-59.5,2023-01-01,,,,,
1,2023-01-01,0,-179.5,-58.5,2023-01-01,,,,,
2,2023-01-01,0,-179.5,-57.5,2023-01-01,,,,,
3,2023-01-01,0,-179.5,-56.5,2023-01-01,,,,,
4,2023-01-01,0,-179.5,-55.5,2023-01-01,,,,,
...,...,...,...,...,...,...,...,...,...,...
2591995,2024-12-01,1,179.5,85.5,2025-01-01,,,,,
2591996,2024-12-01,1,179.5,86.5,2025-01-01,,,,,
2591997,2024-12-01,1,179.5,87.5,2025-01-01,,,,,
2591998,2024-12-01,1,179.5,88.5,2025-01-01,,,,,


In [3]:
#How many values in the time column group, organize by time ascending order
df['time'].value_counts().sort_index()




time
2023-01-01    108000
2023-02-01    108000
2023-03-01    108000
2023-04-01    108000
2023-05-01    108000
2023-06-01    108000
2023-07-01    108000
2023-08-01    108000
2023-09-01    108000
2023-10-01    108000
2023-11-01    108000
2023-12-01    108000
2024-01-01    108000
2024-02-01    108000
2024-03-01    108000
2024-04-01    108000
2024-05-01    108000
2024-06-01    108000
2024-07-01    108000
2024-08-01    108000
2024-09-01    108000
2024-10-01    108000
2024-11-01    108000
2024-12-01    108000
Name: count, dtype: int64

In [4]:
#see data shape 
df.shape

(2592000, 10)

In [5]:
#Total null values in each column
df.isnull().sum()

time                 0
bnds                 0
lon                  0
lat                  0
time_bnds            0
Swnet_tavg     1864368
Snowf_tavg     1864368
Rainf_tavg     1864368
Wind_f_inst    1856400
Tair_f_inst    1856400
dtype: int64

In [6]:
#remove null values
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 727632 entries, 126 to 2591978
Data columns (total 10 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   time         727632 non-null  object 
 1   bnds         727632 non-null  int64  
 2   lon          727632 non-null  float64
 3   lat          727632 non-null  float64
 4   time_bnds    727632 non-null  object 
 5   Swnet_tavg   727632 non-null  float64
 6   Snowf_tavg   727632 non-null  float64
 7   Rainf_tavg   727632 non-null  float64
 8   Wind_f_inst  727632 non-null  float64
 9   Tair_f_inst  727632 non-null  float64
dtypes: float64(7), int64(1), object(2)
memory usage: 61.1+ MB


In [7]:
df



Unnamed: 0,time,bnds,lon,lat,time_bnds,Swnet_tavg,Snowf_tavg,Rainf_tavg,Wind_f_inst,Tair_f_inst
126,2023-01-01,0,-179.5,66.5,2023-01-01,0.889153,0.000012,0.0,3.502832,248.41788
127,2023-01-01,0,-179.5,67.5,2023-01-01,0.585806,0.000013,0.0,2.091139,245.06335
128,2023-01-01,0,-179.5,68.5,2023-01-01,0.290484,0.000014,0.0,2.810090,245.53134
131,2023-01-01,0,-179.5,71.5,2023-01-01,0.007903,0.000005,0.0,3.858478,245.66441
276,2023-01-01,0,-178.5,66.5,2023-01-01,1.018427,0.000012,0.0,3.856865,246.43977
...,...,...,...,...,...,...,...,...,...,...
2591828,2024-12-01,1,178.5,68.5,2025-01-01,0.000000,0.000011,0.0,1.411122,249.06163
2591975,2024-12-01,1,179.5,65.5,2025-01-01,0.206792,0.000019,0.0,3.541325,253.86658
2591976,2024-12-01,1,179.5,66.5,2025-01-01,0.020513,0.000015,0.0,3.845178,253.79471
2591977,2024-12-01,1,179.5,67.5,2025-01-01,0.000000,0.000014,0.0,2.293003,250.90630


In [8]:
#count time columns 
df['time'].value_counts().sort_index()

time
2023-01-01    30318
2023-02-01    30318
2023-03-01    30318
2023-04-01    30318
2023-05-01    30318
2023-06-01    30318
2023-07-01    30318
2023-08-01    30318
2023-09-01    30318
2023-10-01    30318
2023-11-01    30318
2023-12-01    30318
2024-01-01    30318
2024-02-01    30318
2024-03-01    30318
2024-04-01    30318
2024-05-01    30318
2024-06-01    30318
2024-07-01    30318
2024-08-01    30318
2024-09-01    30318
2024-10-01    30318
2024-11-01    30318
2024-12-01    30318
Name: count, dtype: int64