<a href="https://colab.research.google.com/github/FrancisLa/ACFAS/blob/main/0_Colab_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#TITLE
This is where you explain what this code do.

#Initialization

##Import standard libraries

In [None]:
import os
import sys
import subprocess
from datetime import datetime
import numpy as np
import pickle
import re
import pandas as pd
import importlib
import pkg_resources
import types

##Import non standard libraries

In [None]:
def import_or_install(package, version=None):
    try:
        if version is None:
            importlib.import_module(package)
        else:
            dist = pkg_resources.get_distribution(package)
            assert dist.version == version, f"{package}=={version} not found"
    except (ImportError, pkg_resources.DistributionNotFound, AssertionError):
        if version is None:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        else:
            subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}=={version}"])
    finally:
        globals()[package] = importlib.import_module(package)

import_or_install('platform')
import_or_install('socket')
import_or_install('uuid')
import_or_install('psutil')
import_or_install('GPUtil')

##Project name, path and authorization


This code uses Google drive to access the data. The project_name will be used as the main directory in Google drive (inside Colab data). It can be run locally (hosted = False) or through Colab hosting (hosted = True).

In [None]:
project_name = 'Start'
operation_name = 'up'
hosted = True

###Colab hosting

In [None]:
if hosted:
  from google.colab import drive
  drive.mount('/content/gdrive',force_remount=True)
  project_dir = '/content/gdrive/My Drive/Colab Data/' + project_name
  operation_dir = tools_dir = '/content/gdrive/My Drive/Colab Data/'+ project_name+'/'+ operation_name
  tools_dir = '/content/gdrive/My Drive/Colab Data/Tools'
  os.makedirs(operation_dir, exist_ok=True)

Mounted at /content/gdrive


###Local hosting
Local runtime need some preparation (https://research.google.com/colaboratory/local-runtimes.html)

It might be safer to work with python 3.7.9 (https://python.org/downloads/release/python-379/)

You may need to run this command line :

jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=9090 --no-browser

You may use google drive by installing gdrive locally.

In [None]:
if not hosted:
  try:
    os.chdir(os.path.join('G:',os.sep,'Mon disque'))
  except:
    os.chdir(os.path.join('G:',os.sep,'My Drive'))
  project_dir = os.path.join('Colab Data',project_name)
  operation_dir = os.path.join('Colab Data',project_name,operation_name)
  tools_dir = os.path.join('Colab Data','Tools')
  os.makedirs(operation_dir, exist_ok=True)

## Install requirements

If you have a text file named *project_name_operation_name_requirements.txt* in *operation_dir*, with package and version on each line ('re==', or 'pandas==2.0.3'), this code will show them.

In [None]:
if os.path.isfile(os.path.join(operation_dir, project_name+'_'+operation_name+'_requirements.txt')):
  with open(os.path.join(operation_dir, project_name+'_'+operation_name+'_requirements.txt'), 'r') as file_:
    for line in file_:
      package, version = line.strip().split('==')
      if version != '':
        print(f"Package {package}, version {version}")
      else:
        #print(f"Package {package}")
        pass

Import or install special needed package

In [None]:
#import_or_install('needed package')

Run special command in command prompt

In [None]:
command = "" #put any command needed here
result = subprocess.run(command, shell=True, capture_output=True, text=True)

if result.returncode == 0:
    print(result.stdout)
else:
    print("Error:", result.stderr)

##Import data

Assuming the data is in panda dataframe form, and encoded with pickle, let's import the data.

In [None]:
DF = pd.read_pickle(os.path.join(project_dir, operation_name, project_name+'_previous_operation_name.pkl'))

#Step 1

##Substep 1

#Export data

##System information

Note on what type of machine this code was executed.

In [None]:
info={}
info['time'] = datetime.now()
info['python-version'] = platform.python_version()
info['platform'] = platform.system()
info['platform-release'] = platform.release()
info['platform-version'] = platform.version()
info['architecture'] = platform.machine()
info['hostname'] = socket.gethostname()
info['ip-address'] = socket.gethostbyname(socket.gethostname())
info['mac-address'] = ':'.join(re.findall('..', '%012x' % uuid.getnode()))
info['processor'] = platform.processor()
info['threads'] = str(psutil.cpu_count()) + ' logical cores'
try:
  info['speed'] = str(round(psutil.cpu_freq().current)) + " Mhz (currently)"
except:
  info['speed'] = 'unknown'
info['ram'] = str(round(psutil.virtual_memory().total / (1024.0 **3)))+' Go'
info['disk'] = str(round(psutil.disk_usage('/').total / (1024.0 **3)))+' Go'
try:
  info['gpu_total_ram'] = str(round(GPUtil.getGPUs()[0].memoryTotal / 1024.0))+' Go'
except:
  info['gpu_total_ram'] = 'unknown'
try:
  info['gpu_free_ram'] = str(round(GPUtil.getGPUs()[0].memoryFree / 1024.0))+' Go'
except:
  info['gpu_free_ram'] = 'unknown'
try:
  info['gpu_name'] = GPUtil.getGPUs()[0].name
except:
  info['gpu_name'] = 'unknown'
DF_info = pd.DataFrame.from_dict(info, orient='index', columns=['Value'])

##Requirements

Note the packages currently imported into the working environment.

In [None]:
module_names = set()
for name, val in list(globals().items()):  # Create a copy of items to avoid iteration issues
    if isinstance(val, types.ModuleType):
        module_names.add(val.__name__)

filepath=os.path.join(operation_dir, project_name+'_'+operation_name+'_requirements.txt')
with open(filepath, 'w') as file_:
  for name in module_names:
    try:
      version = pkg_resources.get_distribution(name).version
    except:
      version = ""
    file_.write(f"{name}=={version}\n")

DF_requirements = pd.read_csv(filepath, sep="\=\=| @ ", header=None, engine = 'python')
DF_requirements.columns=['pack.','ver.']

##Export dataframe to excel

Assuming the data is in panda dataframe form, let's export it to excel for easy visualization. Let's add the system information and requirements.

In [None]:
writer = pd.ExcelWriter(os.path.join(operation_dir, project_name+'_'+operation_name+'.xlsx'))
DF_info.to_excel(writer,'System')
DF_requirements.to_excel(writer,'Module')
DF.to_excel(writer,'Data')
writer.close()

##Export dataframe to pickle

Assuming the data is in panda dataframe form, let's export it with pickle for easy manipulation.

In [None]:
pd.to_pickle(DF,
             os.path.join(operation_dir, project_name+'_'+operation_name+'.pkl'))