In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os 
%matplotlib inline
from scipy.cluster.hierarchy import linkage, fcluster


In [65]:
def cluster_values(df, num_clusters, cluster_value):
    """
    Clusters values based on their differences using hierarchical clustering.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data to be clustered.
    num_clusters (int): The number of clusters to form.
    cluster_value (str): The name of the column in df to be clustered.

    Returns:
    pd.DataFrame: DataFrame with original data and cluster labels.
    """
    # Perform hierarchical clustering
    C = linkage(df[cluster_value].values.reshape(-1, 1), method='ward')

    # Determine clusters using the number of clusters
    df['Cluster{}'.format(cluster_value)] = fcluster(C, num_clusters, criterion='maxclust')

    # Sort by cluster and value for better visualization
    df = df.sort_values(by=[cluster_value]).reset_index(drop=True)

    return df


In [68]:
def cluster_values2(df, threshold, cluster_value):
    """
    Clusters values based on their differences using hierarchical clustering.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data to be clustered.
    threshold (float): Distance threshold for clustering.
    cluster_value (str): The name of the column in df to be clustered.

    Returns:
    pd.DataFrame: DataFrame with original data and cluster labels.
    """
    # Perform hierarchical clustering
    C = linkage(df[cluster_value].values.reshape(-1, 1), method='ward')

    # Determine clusters using the distance threshold
    df['Cluster{}'.format(cluster_value)]= fcluster(C, threshold, criterion='distance')

    # Sort by cluster and value for better visualization
    df = df.sort_values(by=[cluster_value]).reset_index(drop=True)

    return df

In [79]:
def oct_vacancy_type(n1, n2):
    """
    Assign vacancy type according to neighbor particle types.

    Parameters:
    n1 (int): Neighbor particle 1's type.
    n2 (int): Neighbor particle 2's type.

    Returns:
    vacancy_type (int): 
        0 - Occupied,
        1 - No neighbor vacancy,
        2 - One neighbor vacancy,
        3 - Two neighbor vacancies
    """
    if n1 == n2 == 5:
        vacancy_type = 1
    elif n1 == n2 == 3:
        vacancy_type = 3
    elif n1 + n2 == 9:
        vacancy_type = 0
    else:
        vacancy_type = 2
    
    return vacancy_type


In [91]:
path1='/home/heruhe/Desktop/Ga2o3/relaxed_dataframes/'
natom=80
file1=path1+'beta_orth_cell'
x_max=24.2141442709
#reading all atoms final position
df=pd.read_csv(file1, skiprows = 16, nrows=natom,sep=" ",header=None)
df.columns=['Particle Identifier','Particle Type','x','y','z']
#displacement atom (tetrahedral Ga) df 
dfGa1=df[df['Particle Type']==1].reset_index(drop=True)
#set different layer atoms in z direction to different group
num_clusters = 4
df = cluster_values(df, num_clusters, 'z')
print(dfGa1)

    

    Particle Identifier  Particle Type          x         y         z
0                     3              1   1.888766  0.500000  5.207886
1                     4              1   5.764770  2.042860  0.798275
2                     7              1   7.942302  2.042860  3.734663
3                     8              1  11.818307  0.500000  5.204437
4                    23              1   1.888766  3.585721  5.207886
5                    24              1   5.764770  5.128581  0.798275
6                    27              1   7.942302  5.128581  3.734663
7                    28              1  11.818307  3.585721  5.204437
8                    43              1  13.995838  0.500000  2.261440
9                    44              1  17.871843  2.042860  3.731214
10                   47              1  20.049374  2.042860  0.788216
11                   48              1  23.925379  0.500000  2.257990
12                   63              1  13.995838  3.585721  2.261440
13                  

In [96]:

# populate vacancy list
vacancy = []
x_max = 24.2141442709  #  x_max value

for cluster in df['Clusterz'].unique():
    #print(f"\nGroupz {cluster}:")
    df_layer = df[df['Clusterz'] == cluster].copy()  # Use .copy() to avoid SettingWithCopyWarning
    df_layerO = df_layer[df_layer['Particle Type'] > 2].copy()  # Use .copy() here as well
    # Assuming cluster_values2 correctly filters and sorts df_layerO
    df_layerO = cluster_values2(df_layerO, 0.2, 'y').copy()  # .copy() added for safety
    
    for cluster2 in df_layerO['Clustery'].unique():
        df_layerO_yz = df_layerO[df_layerO['Clustery'] == cluster2].sort_values(by=['x']).reset_index(drop=True)
        
        y = df_layerO_yz['y'].mean()
        z = df_layerO_yz['z'].mean()
        
        # Iterate through the list to compute middle values
        for i in range(len(df_layerO_yz) - 1):
            n1 = df_layerO_yz['Particle Type'][i]
            n2 = df_layerO_yz['Particle Type'][i+1]
            middle_value_x = (df_layerO_yz['x'][i] + df_layerO_yz['x'][i+1]) / 2
            vacancy_type = oct_vacancy_type(n1, n2)
            vacancy.append({'x': middle_value_x, 'y': y, 'z': z, 'vacancy_type': vacancy_type, 'z_layer': cluster})
        
        # Calculate for the last element to wrap around
        n1 = df_layerO_yz['Particle Type'].iloc[0]
        n2 = df_layerO_yz['Particle Type'].iloc[-1]
        middle_value_x = (df_layerO_yz['x'].iloc[0] + x_max + df_layerO_yz['x'].iloc[-1]) / 2
        if middle_value_x > x_max:
            middle_value_x -= x_max
        vacancy_type = oct_vacancy_type(n1, n2)
        vacancy.append({'x': middle_value_x, 'y': y, 'z': z, 'vacancy_type': vacancy_type, 'z_layer': cluster})

# Convert list of dictionaries to DataFrame
df_vacancy = pd.DataFrame(vacancy)
#print(df_vacancy)
df_vacancy_Ga2 = df_vacancy[df_vacancy['vacancy_type'] != 0]
df_vacancy_Ga2

Unnamed: 0,x,y,z,vacancy_type,z_layer
0,4.793271,0.5,0.793246,2,2
2,12.907072,0.5,0.793246,1,2
4,21.020873,0.5,0.793246,2,2
5,0.8,0.5,0.793246,3,2
6,4.793271,3.585721,0.793246,2,2
8,12.907072,3.585721,0.793246,1,2
10,21.020873,3.585721,0.793246,2,2
11,0.8,3.585721,0.793246,3,2
13,6.853536,2.04286,2.261966,1,1
15,14.967337,2.04286,2.261966,2,1


In [121]:
#displace Ga1 to vacancy type x:

def displacement_Ga1(dfGa1, df_vacancy_Ga2, vacancy_type):
    """
    Displaces Ga1 to a vacancy of a specific type (vacancy_type) with a condition
    that the distance between Ga1 and the selected vacancy is greater than 9 angstrom.

    Parameters:
    dfGa1 (pd.DataFrame): DataFrame containing Ga1 particle data.
    df_vacancy_Ga2 (pd.DataFrame): DataFrame containing vacancy data for Ga2 particles.
    vacancy_type (int): Type of vacancy to consider.

    Returns:
    tuple: A tuple containing Ga1 DataFrame row (pd.Series) and displacement values (dx, dy, dz).
           Ga1 - A pandas Series representing the selected Ga1 particle.
           dx - Displacement along the x-axis.
           dy - Displacement along the y-axis.
           dz - Displacement along the z-axis.
    """
    df_vacancy_Ga2 = df_vacancy_Ga2[df_vacancy_Ga2['vacancy_type'] == vacancy_type]
    
    while True:
        Ga1 = dfGa1.sample(n=1)  # Sample one row from dfGa1
        V_Ga2 = df_vacancy_Ga2.sample(n=1)  # Sample one row from df_vacancy_Ga2
    
        dx = V_Ga2['x'].values[0] - Ga1['x'].values[0]
        dy = V_Ga2['y'].values[0] - Ga1['y'].values[0]
        dz = V_Ga2['z'].values[0] - Ga1['z'].values[0]
    
        distance = np.sqrt(dx**2 + dy**2 + dz**2)
    
        if distance > 9:
            break
            
    return Ga1, dx, dy, dz

# Example usage
Ga1, dx, dy, dz = displacement_Ga1(dfGa1, df_vacancy_Ga2, vacancy_type=1)
print("Ga1:")
print(Ga1)
print(f"dx: {dx}, dy: {dy}, dz: {dz}")


Ga1:
   Particle Identifier  Particle Type          x        y         z
9                   44              1  17.871843  2.04286  3.731214
dx: -17.07184258255, dy: 1.5428604666999997, dz: 0.0017246838833333555
