In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np

# Open file
ds = nc.Dataset(r"D:\downloads\20250326-050935Z-032951Z-dayComposite-metop-c-avhrr.nc")

In [2]:
# lat, lon
lat = ds.variables['lat'][:]
lon = ds.variables['lon'][:]

# bands (they are 2D arrays of shape lat x lon)
band1 = ds.variables['band1'][:]
band2 = ds.variables['band2'][:]
band3 = ds.variables['band3a'][:]   # note: name is 'band3a'
band4 = ds.variables['band4'][:]
band5 = ds.variables['band5'][:]

print("Shapes:", lat.shape, lon.shape, band1.shape)

Shapes: (5143,) (6243,) (5143, 6243)


In [3]:
# create meshgrid of lat/lon (2D grid)
lon_grid, lat_grid = np.meshgrid(lon, lat)

# flatten everything
df = pd.DataFrame({
    "lat": lat_grid.flatten(),
    "lon": lon_grid.flatten(),
    "band1": band1.flatten(),
    "band2": band2.flatten(),
    "band3": band3.flatten(),
    "band4": band4.flatten(),
    "band5": band5.flatten()
})

In [4]:
# save to CSV
df.to_csv("processed.csv", index=False)

print("Saved processed.csv with shape:", df.shape)

Saved processed.csv with shape: (32107749, 7)


In [5]:
df = df.iloc[::50, :]   # take every 50th row

In [7]:
import plotly.express as px
import streamlit as st
df = pd.read_csv("processed.csv")
fig = px.scatter(df, x="lon", y="lat", color="band1", title="Band1 Visualization")
st.plotly_chart(fig)

2025-08-28 15:15:31.912 
  command:

    streamlit run C:\Users\Ayush\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [1]:
import netCDF4 as nc

ds = nc.Dataset("data/D1901800_000.nc")
print(ds.variables.keys())

dict_keys(['DATA_TYPE', 'FORMAT_VERSION', 'HANDBOOK_VERSION', 'REFERENCE_DATE_TIME', 'DATE_CREATION', 'DATE_UPDATE', 'PLATFORM_NUMBER', 'PROJECT_NAME', 'PI_NAME', 'STATION_PARAMETERS', 'CYCLE_NUMBER', 'DIRECTION', 'DATA_CENTRE', 'DC_REFERENCE', 'DATA_STATE_INDICATOR', 'DATA_MODE', 'PLATFORM_TYPE', 'FLOAT_SERIAL_NO', 'FIRMWARE_VERSION', 'WMO_INST_TYPE', 'JULD', 'JULD_QC', 'JULD_LOCATION', 'LATITUDE', 'LONGITUDE', 'POSITION_QC', 'POSITIONING_SYSTEM', 'PROFILE_PRES_QC', 'PROFILE_TEMP_QC', 'PROFILE_PSAL_QC', 'VERTICAL_SAMPLING_SCHEME', 'CONFIG_MISSION_NUMBER', 'PRES', 'PRES_QC', 'PRES_ADJUSTED', 'PRES_ADJUSTED_QC', 'PRES_ADJUSTED_ERROR', 'TEMP', 'TEMP_QC', 'TEMP_ADJUSTED', 'TEMP_ADJUSTED_QC', 'TEMP_ADJUSTED_ERROR', 'PSAL', 'PSAL_QC', 'PSAL_ADJUSTED', 'PSAL_ADJUSTED_QC', 'PSAL_ADJUSTED_ERROR', 'PARAMETER', 'SCIENTIFIC_CALIB_EQUATION', 'SCIENTIFIC_CALIB_COEFFICIENT', 'SCIENTIFIC_CALIB_COMMENT', 'SCIENTIFIC_CALIB_DATE', 'HISTORY_INSTITUTION', 'HISTORY_STEP', 'HISTORY_SOFTWARE', 'HISTORY_SOFTW

In [10]:
import netCDF4 as nc
import pandas as pd
import numpy as np

# Load one profile file

ds = nc.Dataset(r"D:\Coding\Ml_Projects\SIH\Data\D1901800_000.nc")

# Extract variables safely
pres = ds["PRES_ADJUSTED"][:].filled(np.nan) if "PRES_ADJUSTED" in ds.variables else ds["PRES"][:].filled(np.nan)
temp = ds["TEMP_ADJUSTED"][:].filled(np.nan) if "TEMP_ADJUSTED" in ds.variables else ds["TEMP"][:].filled(np.nan)
psal = ds["PSAL_ADJUSTED"][:].filled(np.nan) if "PSAL_ADJUSTED" in ds.variables else ds["PSAL"][:].filled(np.nan)

lat = float(ds["LATITUDE"][0]) if "LATITUDE" in ds.variables else np.nan
lon = float(ds["LONGITUDE"][0]) if "LONGITUDE" in ds.variables else np.nan
time = pd.to_datetime(ds["JULD"][:], origin="1950-01-01", unit="D")[0] if "JULD" in ds.variables else None

platform = "".join([p.tobytes().decode('utf-8').strip() for p in ds["PLATFORM_NUMBER"][:]]) if "PLATFORM_NUMBER" in ds.variables else "unknown"
cycle = int(ds["CYCLE_NUMBER"][0]) if "CYCLE_NUMBER" in ds.variables else -1

if "PLATFORM_NUMBER" in ds.variables:
    try:
        platform = "".join([p.tobytes().decode('utf-8').strip() for p in ds["PLATFORM_NUMBER"][:]])
    except Exception:
        platform = str(ds["PLATFORM_NUMBER"][:])
else:
    platform = "unknown"

# Build dataframe
df = pd.DataFrame({
    "platform_number": platform,
    "cycle_number": cycle,
    "time": time,
    "latitude": lat,
    "longitude": lon,
    "pressure_dbar": pres.flatten(),
    "temperature_C": temp.flatten(),
    "salinity_psu": psal.flatten()
}).dropna()

# Save CSV
df.to_csv("argo_profile.csv", index=False)
print("✅ Saved argo_profile.csv")
print(df.head())


✅ Saved argo_profile.csv
   platform_number  cycle_number                          time  latitude  \
13  19018001901800             0 2016-02-22 13:36:31.391999853 -48.60123   
14  19018001901800             0 2016-02-22 13:36:31.391999853 -48.60123   
15  19018001901800             0 2016-02-22 13:36:31.391999853 -48.60123   
16  19018001901800             0 2016-02-22 13:36:31.391999853 -48.60123   
17  19018001901800             0 2016-02-22 13:36:31.391999853 -48.60123   

    longitude  pressure_dbar  temperature_C  salinity_psu  
13  -20.01239      16.040001          5.964     33.834999  
14  -20.01239      17.959999          5.964     33.834999  
15  -20.01239      19.879999          5.964     33.834999  
16  -20.01239      22.040001          5.960     33.834999  
17  -20.01239      24.080000          5.959     33.834999  
