# Data Extraction


## Projemde, .mat uzantılı dosyalarını CSV formatına dönüştürdüm. Capacity değerlerini hesaplayarak yeni CSV dosyaları oluşturdum. Bu CSV dosyaları, .mat uzantılı dosyaların dönüştürülmesiyle elde edilen B0005, B0006,B0007, ve B0018 gibi dosyalardır.

Dataset link = https://phm-datasets.s3.amazonaws.com/NASA/5.+Battery+Data+Set.zip

In [31]:
import datetime
import numpy as np
import pandas as pd
from scipy.io import loadmat
from pandas import DataFrame

# İstediğiniz bataryanın dosya yoluSnu belirtin
battery_file = r'./B0007.mat'  # Dosya yolunu buraya yazın

#define a function for extracting discharge and charge data
def disch_data(battery_file):
  mat = loadmat(battery_file) #get the .mat file
  battery = list(mat.keys())[3]  # Battery adını otomatik olarak almak için
  print('Total data in dataset: ', len(mat[battery][0, 0]['cycle'][0])) #get the length of the data from number of cycles
  c = 0 #set a variable to zero
  disdataset = [] #create an empty list for discharge data
  capacity_data = []
  
  for i in range(len(mat[battery][0, 0]['cycle'][0])):
    row = mat[battery][0, 0]['cycle'][0, i] #get each row of the cycle
    if row['type'][0] == 'discharge': #if the row is a dicharge cycle
      ambient_temperature = row['ambient_temperature'][0][0] #get temp,date_time stamp,capacity,voltage,current etc,.
      date_time = datetime.datetime(int(row['time'][0][0]),
                               int(row['time'][0][1]),
                               int(row['time'][0][2]),
                               int(row['time'][0][3]),
                               int(row['time'][0][4])) + datetime.timedelta(seconds=int(row['time'][0][5]))
      data = row['data']
      capacity = data[0][0]['Capacity'][0][0]
      for j in range(len(data[0][0]['Voltage_measured'][0])):
        voltage_measured = data[0][0]['Voltage_measured'][0][j]
        current_measured = data[0][0]['Current_measured'][0][j]
        temperature_measured = data[0][0]['Temperature_measured'][0][j]
        current_load = data[0][0]['Current_load'][0][j]
        voltage_load = data[0][0]['Voltage_load'][0][j]
        time = data[0][0]['Time'][0][j]
        disdataset.append([c + 1, ambient_temperature, date_time, capacity,
                        voltage_measured, current_measured,
                        temperature_measured, current_load,
                        voltage_load, time])
        capacity_data.append([c + 1, ambient_temperature, date_time, capacity])
      c = c + 1
  print(disdataset[0])
  return [pd.DataFrame(data=disdataset,
                       columns=['cycle', 'ambient_temperature', 'datetime',
                                'capacity', 'voltage_measured',
                                'current_measured', 'temperature_measured',
                                'current_load', 'voltage_load', 'time']),
          pd.DataFrame(data=capacity_data,
                       columns=['cycle', 'ambient_temperature', 'datetime',
                                'capacity'])]

def charge_data(battery_file): #similarly write a fn for charge data
  mat = loadmat(battery_file)
  battery = list(mat.keys())[3]  # Battery adını otomatik olarak almak için
  c = 0
  chdataset = []
  
  for i in range(len(mat[battery][0, 0]['cycle'][0])):
    row = mat[battery][0, 0]['cycle'][0, i]
    if row['type'][0] == 'charge' :
            
      ambient_temperature = row['ambient_temperature'][0][0]
      date_time = datetime.datetime(int(row['time'][0][0]),
                               int(row['time'][0][1]),
                               int(row['time'][0][2]),
                               int(row['time'][0][3]),
                               int(row['time'][0][4])) + datetime.timedelta(seconds=int(row['time'][0][5]))
      data = row['data']
      for j in range(len(data[0][0]['Voltage_measured'][0])):
        voltage_measured = data[0][0]['Voltage_measured'][0][j]
        current_measured = data[0][0]['Current_measured'][0][j]
        temperature_measured = data[0][0]['Temperature_measured'][0][j]
        current_charge = data[0][0]['Current_charge'][0][j]
        voltage_charge = data[0][0]['Voltage_charge'][0][j]
        time = data[0][0]['Time'][0][j]
        chdataset.append([c + 1, ambient_temperature, date_time,
                        voltage_measured, current_measured,
                        temperature_measured, current_charge,
                        voltage_charge, time])
      c = c + 1
  print(chdataset[0])
  return pd.DataFrame(data=chdataset,columns=['cycle', 'ambient_temperature', 'datetime', 
                                'voltage_measured','current_measured',
                                'temperature_measured','current_load',
                                'voltage_load', 'time'])


# disch_data fonksiyonundan dönen DataFrame'leri ayırın
dis_df, dis_capacity_df = disch_data(battery_file)

# DataFrame'leri ekrana yazdırma
pd.set_option('display.max_columns', 10)
print("Discharge DataFrame:")
print(dis_df.head())
print("\nCapacity DataFrame:")
print(dis_capacity_df.head())


Total data in dataset:  616
[1, 24, datetime.datetime(2008, 4, 2, 15, 25, 41), 1.89105229539079, 4.199359521791473, -0.0018658918833680202, 23.93704394613529, -0.0004, 0.0, 0.0]
Discharge DataFrame:
   cycle  ambient_temperature            datetime  capacity  voltage_measured  \
0      1                   24 2008-04-02 15:25:41  1.891052          4.199360   
1      1                   24 2008-04-02 15:25:41  1.891052          4.199497   
2      1                   24 2008-04-02 15:25:41  1.891052          3.985606   
3      1                   24 2008-04-02 15:25:41  1.891052          3.963247   
4      1                   24 2008-04-02 15:25:41  1.891052          3.946647   

   current_measured  temperature_measured  current_load  voltage_load    time  
0         -0.001866             23.937044       -0.0004         0.000   0.000  
1         -0.002139             23.924074       -0.0004         4.215  16.781  
2         -1.988778             24.004257       -2.0000         3.003  35.

In [32]:

# DataFrame'i CSV dosyasına kaydet
csv_file_path = 'B000asdfg7.csv'
dis_df.to_csv(csv_file_path, index=False)
print(f"Data has been saved to {csv_file_path}")


Data has been saved to B000asdfg7.csv
