# Cephノード構築：障害により削除したOSDを復帰させる
HDD障害などにより削除したOSDを復帰させる。<br>
destroyed状態になってOSDを対象とする。（replaceで削除したOSD）

# パラメータ定義

## パラメータファイル設定

Cephクラスタの定義ファイルを指定するため、<br>
パラメータ定義格納ディレクトリを表示する。

In [None]:
!ls -p ~/notebooks/share/ | grep "/$"

In [None]:
!ls -p ~/notebooks/share/configuration

In [None]:
!ls -p ~/notebooks/share/configuration/ceph

In [None]:
import sys, os.path, importlib, json, yaml

config_file_name = "ceph_admin_vm.yml"
config_file_path = os.path.expanduser("~/notebooks/share/configuration/ceph/{}".format(config_file_name))
with open(config_file_path) as f:
    admin_params = yaml.safe_load(f)
admin_params

In [None]:
target_vm_name = 'xxx-ceph-admin1'
admin_patam = admin_params[target_vm_name]
admin_patam

In [None]:
ceph_admin_ip = admin_patam['network']['provisioning']['ip']

In [None]:
import sys, os.path, importlib, json, yaml

config_file_name = "xxx-openstack-parameter.yml"
config_file_path = os.path.expanduser("~/notebooks/share/configuration/{}".format(config_file_name))
with open(config_file_path) as f:
    params = yaml.safe_load(f)
params

In [None]:
repo_server = params['repository']['addresses']['service_operation']
repo_server

In [None]:
provisioning_vip = params['openstack']['provisioning_vip']
provisioning_vip

## CephノードとストレージセグメントIPアドレス対応表の読み込み

In [None]:
import os
import csv
import pandas as pd

instance_ip_csv_path =  os.environ['HOME'] + "/notebooks/share/configuration/ceph/ceph_instance_ip.csv"
instance_ip_list = []
instance_ip_map = {}

with open(instance_ip_csv_path, 'r') as path_csv:
    reader = csv.DictReader(path_csv)
    
    for record in reader:
        instance_ip_list.append(record)
        instance_ip_map[record['instance_name']] = record

pd.DataFrame(instance_ip_list)

## 作業用ディレクトリを作成する

In [None]:
import os,tempfile
temp_dir = tempfile.mkdtemp()
print (temp_dir)

## AnsibleのInventoryファイルを作業用ディレクトリへ作成する

In [None]:
import os

with open( os.path.join(temp_dir, "hosts"), 'w') as f:
    f.write('''[openstack_ctl]
{openstack_cmn}
'''.format(openstack_cmn = provisioning_vip))

hosts = temp_dir + "/hosts"
!cat $hosts

In [None]:
%env ANSIBLE_INVENTORY={hosts}

疎通確認

In [None]:
!ansible -m ping openstack_ctl

In [None]:
!ansible -a 'hostname' openstack_ctl

## OpenStack用OpenRCファイル設定

`xxxxxxx`プロジェクト用のOpenRCファイルを事前にダウンロードしておくこと。  

In [None]:
import os

user_creds = os.path.expanduser('~/.keys/xxxxxxx-openrc.sh')

assert os.path.exists(user_creds), '{} is not exist'.format(user_creds)

In [None]:
%env USER_CREDS={user_creds}

In [None]:
!ls -l {user_creds}

## openstackコマンド用ユーティリティ関数

In [None]:
def build_cmdline(param):
    cmdline = ''
    
    for param_name, param_value in param.items():
        if isinstance(param_value, dict):
            for key, value in param_value.items():
                cmdline += ' --{} {}={}'.format(param_name, key, value)
        elif isinstance(param_value, list):
            for item in param_value:
                cmdline += ' --{} {}'.format(param_name, item)
        elif isinstance(param_value, bool):
            if param_value:
                cmdline += ' --{}'.format(param_name)
        else:
            cmdline += ' --{} {}'.format(param_name, param_value)

    return cmdline

In [None]:
import json

def run_openstack(cmd):
    print('EXEC: {}'.format(cmd))
    out = !source {user_creds} && {cmd}
    print('OUTPUT: {}'.format('\n'.join(out)))
    return json.loads('\n'.join(out))

テスト

In [None]:
run_openstack('openstack flavor list -f json' + build_cmdline({'all': True}))

## デプロイされたCephノード情報の取得

ストレージノードを特定するにあたり、ノード名称が　ceph-nodexxx （xxxの部分は、右寄せ0埋めの数字3桁）というルールに従っていることを前提としている。<br>
 例：ceph-node001<br>
 このノード名称ルールは、構築するCephクラスタ毎に変えるため、以下で定義する。

In [None]:
# 初期化対象のインスタンス名称を設定する
raise Exception('この先手作業が必要です')

In [None]:
import pandas as pd
import re

# 対象インスタンス名称マッチングルール
name_match_rule = r'ceph-xxx-node-[0-9]+'

## OpenStack Ironicからノード情報の取得

In [None]:
cmd = "openstack baremetal node list -f json --long"
nodes = run_openstack(cmd)

In [None]:
import pandas as pd
import re

for node in nodes:
    instance_uuid = node.get("Instance UUID", None)
    if instance_uuid is None:
        continue
        
    node["instance_name"] = node["Instance Info"]["display_name"]

pd.set_option('display.max_rows', None)
pd.DataFrame(nodes, columns=['Instance UUID', 'instance_name', 'UUID', 'Maintenance', 'Provisioning State', 'Power State', 'Name'])

In [None]:
cmd = "openstack server list -f json --long"
nova_server_map = run_openstack(cmd)

In [None]:
node_list = []
for server in nova_server_map:
    node_id = server['ID']
    if re.match(name_match_rule, server['Name']) is None:
        continue
    for node in nodes:
        if node['Instance UUID'] == node_id:
            node_info = {}
            node_list.append(node_info)
            node_info['name'] = server['Name']
            node_info['node_name'] = node['Name']
            instance_ip_info = instance_ip_map.get(server['Name'], None)
            if instance_ip_info is not None:
                node_info['instance_ip'] = instance_ip_info.get('ipv4', None)
            else:
                node_info['instance_ip'] = None
            address = server['Networks']
            prov_net = address['provisioning-net']
            for addr in prov_net:
                node_info['prov_ip'] = addr

pd.DataFrame(node_list)

In [None]:
# nameをキーにしたノード情報生成
node_map = {}
for node in node_list:
    node_map[node['name']] = node

## 対象Cephノード情報の取得

初期化対象のインスタンス名称を設定する

In [None]:
# adminのインスタンス名称を設定する
raise Exception('この先手作業が必要です')

### adminノードの指定

In [None]:
bootstrap_node_name = 'ceph-xxx-node-001'

In [None]:
bootstrap_node = node_map.get(bootstrap_node_name, None)

assert (bootstrap_node is not None), "指定されたBootstrap nodeが存在しません。"
print(bootstrap_node)

# Inventoryファイルを生成する。

In [None]:
node_user='xxxxx'
cephadmin_user='cephadmin'
node_key='~/.ssh/id_ras'
proxy_user='xxxxx'
proxy_key='~/.ssh/id_ras'

In [None]:
bootstrap_ip = bootstrap_node['prov_ip']

In [None]:
import os

with open( os.path.join(temp_dir, "hosts"), 'w') as f:

    f.write('''[ceph_admin]
{ceph_admin_ip} ansible_user={proxy_user} ansible_ssh_private_key_file={proxy_key} ansible_python_interpreter=/usr/bin/python3
'''.format(ceph_admin_ip=ceph_admin_ip, proxy_user=proxy_user, proxy_key=proxy_key, bootstrap_ip=bootstrap_ip))

    f.write('''[bootstrap]
{node_ip} ansible_user={node_user} ansible_ssh_private_key_file={node_key} ansible_ssh_common_args='-o ControlMaster=auto -o StrictHostKeyChecking=no -o ControlPersist=30m -o ProxyCommand="ssh -W %h:%p -i {proxy_key} -q {proxy_user}@{ceph_admin_ip}"' ansible_python_interpreter=/usr/bin/python3
[ceph_nodes]
'''.format(ceph_admin_ip=ceph_admin_ip, node_ip=bootstrap_ip, node_key=node_key,
           node_user=node_user, proxy_user=proxy_user, proxy_key=proxy_key))
    
    for node in node_list:
        node_ip = node['prov_ip']
        f.write('''{node_ip} ansible_user={node_user} ansible_ssh_private_key_file={node_key} ansible_ssh_common_args='-o ControlMaster=auto -o StrictHostKeyChecking=no -o ControlPersist=30m -o ProxyCommand="ssh -W %h:%p -i {proxy_key} -q {proxy_user}@{ceph_admin_ip}"' ansible_python_interpreter=/usr/bin/python3
'''.format(ceph_admin_ip=ceph_admin_ip, node_ip=node_ip, node_key=node_key,
           node_user=node_user, proxy_user=proxy_user, proxy_key=proxy_key))

hosts = temp_dir + "/hosts"

!cat $hosts

In [None]:
%env ANSIBLE_INVENTORY={hosts}

## 疎通確認
以下コマンドを実行し、OpeHubからCephAdmin経由で、Cephノードに疎通することを確認する。

In [None]:
!ansible -b -m shell -a 'hostname' ceph_admin

In [None]:
for node in node_list:
    prov_ip = node['prov_ip']
    !ansible -b -m shell -a 'hostname' $prov_ip

In [None]:
!ansible -b -m shell -a 'hostname' ceph_nodes

In [None]:
!ansible -b -m shell -a 'hostname' bootstrap

# OSD情報の取得

## 全てのOSD情報取得

In [None]:
ret = !ansible -b -m shell -a 'cephadm shell -- ceph osd tree --format=json 2> /dev/null' bootstrap

In [None]:
print(ret)

In [None]:
osd_tree = json.loads(ret[2])
print(json.dumps(osd_tree, indent=2))

In [None]:
host_map = {}
osd_map = {}
osd_info_list = []

nodes = osd_tree['nodes']
for osd_tree_info in nodes:
    info_type = osd_tree_info.get('type',None)
    if info_type == "host":
        host_map[osd_tree_info['name']] = osd_tree_info
    elif  info_type == "osd":
        osd_map[osd_tree_info['id']] = osd_tree_info

for key,val in host_map.items():
    children = val.get('children',[])
    for osd_id in children:
        osd = osd_map.get(osd_id,None)
        if osd is not None:
            osd['host_name'] = key
            osd.pop('pool_weights')
            osd_info_list.append(osd)


In [None]:
pd.DataFrame(osd_info_list)

## 対象OSDの詳細情報取得

In [None]:
ret = !ansible -b -m shell -a "cephadm shell -- ceph osd metadata --format=json 2> /dev/null" bootstrap

In [None]:
osd_metadata_list = json.loads(ret[2])
print(json.dumps(osd_metadata_list, indent=2))

In [None]:
for osd_metadata in osd_metadata_list:
    bluefs_db_partition_path = osd_metadata['bluefs_db_partition_path']
    bluestore_bdev_partition_path = osd_metadata['bluestore_bdev_partition_path']
    osd_id =  osd_metadata['id']
    osd = osd_map.get(osd_id,None)
    osd['bluefs_db_partition_path'] = bluefs_db_partition_path
    osd['bluestore_bdev_partition_path'] = bluestore_bdev_partition_path


In [None]:
pd.DataFrame(osd_info_list)

# LVMのデバイスマップ情報取得

## LVM情報取得関数定

In [None]:
import re

def make_pv_info_list(ret):
    ret.pop(0)
    pv_info_list = []
    pv_info = None
    for line in ret:
        if re.search('Physical volume', line) is not None:
            pv_info = {}
        elif re.search('PV Name', line) is not None:
            elems = re.sub('PV Name','',line).split()
            pv_info['pv_name'] = elems[0]
        elif re.search('VG Name', line) is not None:
            elems = re.sub('VG Name','',line).split()
            pv_info['vg_name'] = elems[0]
        elif re.search('PV Size', line) is not None:
            elems = re.sub('PV Size','',line).split()
            pv_info['pv_size'] = elems[0]
            pv_info_list.append(pv_info)

    return pv_info_list

def make_lv_info_list(ret, pv_info_map):
    lv_info_list = []
    lv_info = None
    for line in ret:
        if re.search('Logical volume', line) is not None:
            lv_info = {}
        elif re.search('LV Path', line) is not None:
            elems = re.sub('LV Path','',line).split()
            lv_info['lv_path'] = elems[0]
        elif re.search('LV Name', line) is not None:
            elems = re.sub('LV Name','',line).split()
            lv_info['lv_name'] = elems[0]
        elif re.search('VG Name', line) is not None:
            elems = re.sub('VG Name','',line).split()
            lv_info['vg_name'] = elems[0]
            lv_info['pv_name'] = pv_info_map.get(lv_info['vg_name'],None)
        elif re.search('Block device', line) is not None:
            elems = re.sub('Block device','',line).split()
            lv_info['block_dev'] = elems[0]
            dev_elems = lv_info['block_dev'].split(':')
            dev_name = "dm-{}".format(dev_elems[1])
            lv_info['dev_name'] = dev_name
            lv_info_list.append(lv_info)
            
    return lv_info_list
            

## PV情報取得

In [None]:
pv_info_list_map = {}
lv_info_list_map = {}

for node in node_list:
    # get PV Info
    host_name = node['name']
    prov_ip = node['prov_ip']
    ret = !ansible -i $hosts -b -m shell -a 'pvdisplay' $prov_ip
    pv_info_list = make_pv_info_list(ret)
    pv_info_list_map[host_name] = pv_info_list

    pv_info_map = {}
    for pv_info in pv_info_list:
        vg_name = pv_info['vg_name']
        pv_info_map[vg_name] = pv_info['pv_name']    
    
    # get PV Info
    ret = !ansible -i $hosts -b -m shell -a 'lvdisplay' $prov_ip
    lv_info_list = make_lv_info_list(ret,pv_info_map)
    lv_info_list_map[host_name] = lv_info_list


In [None]:
for key, pv_info_list in pv_info_list_map.items():
    ret = pd.DataFrame(pv_info_list)
    print(key)
    print(ret)


In [None]:
for key, lv_info_list in lv_info_list_map.items():
    ret = pd.DataFrame(lv_info_list)
    print(key)
    print(ret)

## OSD情報リストと対象ディスクとの関連付け

In [None]:
for osd_info in osd_info_list:
    host_name = osd_info['host_name']
    name = osd_info['name']
    bluefs_db_partition_path = osd_info['bluefs_db_partition_path']
    db_dev_name = bluefs_db_partition_path.split('/')[2]
    bluestore_bdev_partition_path = osd_info['bluestore_bdev_partition_path']
    bdev_dev_name = bluestore_bdev_partition_path.split('/')[2]

    lv_info_list = lv_info_list_map[host_name]
    target_db_lv_info = None
    target_bdev_lv_info = None
    for lv_info in lv_info_list:
        if lv_info['dev_name'] == db_dev_name:
            target_db_lv_info = lv_info
        if lv_info['dev_name'] == bdev_dev_name:
            target_bdev_lv_info = lv_info
        if target_bdev_lv_info is None or target_db_lv_info is None:
            continue

    if target_bdev_lv_info is not None:
        osd_info['bdev_lv_path'] = target_bdev_lv_info['lv_path']
        osd_info['bdev_lv_name'] = target_bdev_lv_info['lv_name']
        osd_info['bdev_vg_name'] = target_bdev_lv_info['vg_name']
        osd_info['bdev_pv_name'] = target_bdev_lv_info['pv_name']
    else:
        osd_info['bdev_lv_path'] = None
        osd_info['bdev_lv_name'] = None
        osd_info['bdev_vg_name'] = None
        osd_info['bdev_pv_name'] = None
    

    if target_db_lv_info is not None:
        osd_info['db_lv_path'] = target_db_lv_info['lv_path']
        osd_info['db_lv_name'] = target_db_lv_info['lv_name']
        osd_info['db_vg_name'] = target_db_lv_info['vg_name']
        osd_info['db_pv_name'] = target_db_lv_info['pv_name']
    else:
        osd_info['db_lv_path'] = None
        osd_info['db_lv_name'] = None
        osd_info['db_vg_name'] = None
        osd_info['db_pv_name'] = None
    

In [None]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', len(osd_info_list))
pd.DataFrame(osd_info_list)

# 削除OSDの復帰

## Destroyed OSD情報取得

In [None]:
destroyed_osd_info_list = []
for osd_info in osd_info_list:
    status = osd_info['status']
    if status == 'destroyed':
        destroyed_osd_info_list.append(osd_info)

In [None]:
pd.DataFrame(destroyed_osd_info_list)

## 復帰対象OSD指定

In [None]:
# 復帰対象OSDのIDを指定
raise Exception('OSDの復帰は1つづつ行う。そのため、復帰対象OSDが複数存在していることを想定し、手動で対象OSD IDを指定する。')

In [None]:
recovery_osd_id = 60

## 復帰先LVチェック

In [None]:
recovery_osd_info = None
for osd_id in destroyed_osd_info_list:
    if recovery_osd_id == osd_id['id']:
        recovery_osd_info = osd_id
        break
print(recovery_osd_info)

In [None]:
bdev_lv_path = recovery_osd_info['bdev_lv_path']
db_lv_path = recovery_osd_info['db_lv_path']
host_name = recovery_osd_info['host_name']
print("host={} osd.{} db_lv_path={} bdev_lv_path={}".format(host_name, recovery_osd_id,db_lv_path,bdev_lv_path))

assert (db_lv_path is not None), "復帰先のdb_lv_pathが存在しません。復帰するOSDのLVが復旧していないため、復帰できません。"
assert (bdev_lv_path is not None), "復帰先のbdev_lv_pathが存在しません。復帰するOSDのLVが復旧していないため、復帰できません。"


復帰するOSDのLVが存在しないケースでは、HDDが交換とLVの復旧が完了していないことを示している。<br>
この場合、HDDとLVの復旧を行ったのちに再度実行するか、未使用HDDとLVを指定して復旧させる。<br>

未使用HDDとLVを指定して復旧させる場合は、以下のNotebookを用いて、未使用LVを使用してOSDを構築する。<br>
yyyy-mm-dd_nn_D15_Ceph_Create_OSD_Select_DB_DATA_LV_[NODE_NAME] <br>

復帰OSDとして、削除したOSDと同じノード、同じLVを用いる必要はない。

## 復帰先LV再構築

### DB LV再構築

In [None]:
db_lv_num = 0
lv_info_list = lv_info_list_map[host_name]
for lv_info in lv_info_list:
    vg_name = lv_info['vg_name']
    if vg_name == 'vg_db':
        db_lv_num += 1

print(db_lv_num)

In [None]:
node = node_map[host_name]
prov_ip = node['prov_ip']
!ansible -i $hosts -b -m shell -a 'hostname' $prov_ip

#### DB LV 削除

In [None]:
db_vg_name = recovery_osd_info['db_vg_name']
db_lv_name = recovery_osd_info['db_lv_name']
print ("db_vg_name={} db_lv_name={}".format(db_vg_name,db_lv_name))

In [None]:
!ansible -b -m shell -a "wipefs -an /dev/$db_vg_name/$db_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "wipefs -a /dev/$db_vg_name/$db_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "wipefs -an /dev/$db_vg_name/$db_lv_name" $prov_ip

In [None]:
!ansible -i $hosts -b -m shell -a "lvchange --activate n $db_vg_name/$db_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvremove /dev/$db_vg_name/$db_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvdisplay $db_vg_name" $prov_ip

#### DB LV 生成

In [None]:
import math
lv_size = 1.0 / db_lv_num * 100.0
lv_size = math.floor(lv_size)
print (lv_size)

In [None]:
#elems = db_lv_name.split('_')
#replace_cnt = None
#if len(elems) < 3:
#    replace_cnt = 1
#elif len(elems) == 3:
#    replace_cnt = int(elems[2]) + 1
#
#next_db_lv_name = "{}_{}_{}".format(elems[0],elems[1],replace_cnt)
next_db_lv_name = db_lv_name
print(next_db_lv_name)

In [None]:
lv_size_perc = "{}%VG".format(lv_size)
!ansible -i $hosts shell -a "lvcreate -l $lv_size_perc -n $next_db_lv_name $db_vg_name" $prov_ip

In [None]:
!ansible -i $hosts shell -a "lvdisplay $db_vg_name" $prov_ip

### DATA LV 再構築

#### DATA LV 削除

In [None]:
bdev_vg_name = recovery_osd_info['bdev_vg_name']
bdev_lv_name = recovery_osd_info['bdev_lv_name']
print ("bdev_vg_name={} bdev_lv_name={}".format(bdev_vg_name,bdev_lv_name))

In [None]:
!ansible -b -m shell -a "wipefs -an /dev/$bdev_vg_name/$bdev_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "wipefs -a /dev/$bdev_vg_name/$bdev_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "wipefs -an /dev/$bdev_vg_name/$bdev_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvchange --activate n $bdev_vg_name/$bdev_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvremove /dev/$bdev_vg_name/$bdev_lv_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvdisplay $bdev_vg_name" $prov_ip

#### DATA LV 生成

In [None]:
#elems = bdev_lv_name.split('_')
#replace_cnt = None
#if len(elems) < 4:
#    replace_cnt = 1
#elif len(elems) == 4:
#    replace_cnt = int(elems[3]) + 1
#
#next_bdev_lv_name = "{}_{}_{}_{}".format(elems[0],elems[1],elems[2],replace_cnt)

next_bdev_lv_name = bdev_lv_name
print(next_bdev_lv_name)

In [None]:
lv_size_perc = "{}%VG".format(lv_size)
!ansible -b -m shell -a "lvcreate -l 100%FREE -n $next_bdev_lv_name $bdev_vg_name" $prov_ip

In [None]:
!ansible -b -m shell -a "lvdisplay $bdev_vg_name" $prov_ip

### LVパス生成

In [None]:
db_lv_path = "/dev/{}/{}".format(db_vg_name,next_db_lv_name)
print(db_lv_path)

In [None]:
bdev_lv_path = "/dev/{}/{}".format(bdev_vg_name,next_bdev_lv_name)
print(bdev_lv_path)

## OSD構築前のスクラビング無効化

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd set noscrub" bootstrap

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd set nodeep-scrub" bootstrap

## OSD 復帰

In [None]:
assert (db_lv_path is not None and bdev_lv_path is not None and host_name is not None), "パラメータの不備"

In [None]:
print("host_name={} db_dev={} data_dev={}".format(host_name,db_lv_path,bdev_lv_path))

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph orch daemon add osd $host_name:data_devices=$bdev_lv_path,db_devices=$db_lv_path" bootstrap

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd tree" bootstrap

In [None]:
!ansible -b -m shell -a 'cephadm shell -- ceph orch ls' bootstrap

In [None]:
!ansible -b -m shell -a 'cephadm shell -- ceph -s' bootstrap

## OSD構築後のスクラビング有効化

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd unset noscrub" bootstrap

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd unset nodeep-scrub" bootstrap

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph -s" bootstrap

In [None]:
!ansible -b -m shell -a "cephadm shell -- ceph osd in osd.$recovery_osd_id" bootstrap

# 後始末

一時ディレクトリを削除する。

In [None]:
!rm -fr $temp_dir