In [None]:
!rm -r rruff_files

In [None]:
import requests
from bs4 import BeautifulSoup
import time
import os
from tqdm import tqdm

class RRUFFScraper:
    def __init__(self):
        self.base_url = "https://rruff.info"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

    def get_mineral_links(self, letter):
        """获取某个字母开头的所有矿物链接"""
        url = f"{self.base_url}/index.php/r=lookup_minerals/letter={letter}/calling_form=frm_sample_search/name_field=txt_mineral/id_field=(letter)"
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        links = []
        for strong_tag in soup.find_all('strong'):
            a_tag = strong_tag.find('a')
            if a_tag and 'SubmitWin' in a_tag.get('href', ''):
                href = a_tag['href']
                mineral_name = href.split("'")[1]
                mineral_id = href.split("'")[3]
                links.append((mineral_name, mineral_id))
        return links

    def download_xray_data(self, mineral_info, output_dir='rruff_files'):
        """下载 X-ray Data (XY - Processed) 和 X-ray Data (XY - RAW) 文件"""
        mineral_name, mineral_id = mineral_info
        url = f"{self.base_url}/{mineral_name}/R{mineral_id}"

        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')

            powder_table = None
            for table in soup.find_all('table'):
                th = table.find('th')
                if th and "POWDER DIFFRACTION" in th.text:
                    powder_table = table
                    break

            if powder_table:
                download_links = {}

                for tr in powder_table.find_all('tr'):
                    for a in tr.find_all('a', href=True):
                        if "X-ray Data (XY - Processed)" in a.text or "X-ray Data (XY - RAW)" in a.text:
                            link = a['href']
                            if link.startswith('http'):
                                xray_data_link = link
                            else:
                                xray_data_link = self.base_url + link

                            file_info = a.text.strip().replace(" ", "_").replace("(", "").replace(")", "").lower()
                            download_links[xray_data_link] = file_info

                if not download_links:
                    return False

                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)

                for xray_data_link, file_info in download_links.items():
                    filename = f"{output_dir}/{mineral_name}_{file_info}.txt"

                    file_response = requests.get(xray_data_link, headers=self.headers)
                    with open(filename, 'wb') as f:
                        f.write(file_response.content)

                return True
            else:
                return False

        except requests.exceptions.RequestException as e:
            print(f"Error during requests to {url}: {str(e)}")
            print(soup.prettify())
            return False
        except Exception as e:
            print(f"Error processing {mineral_info}: {str(e)}")
            return False

    def scrape_all(self, start_letter='a', end_letter='z', delay=2):
        """下载从start_letter到end_letter的所有RRUFF文件"""
        success_count = 0

        for letter in range(ord(start_letter.lower()), ord(end_letter.lower()) + 1):
            letter = chr(letter)

            mineral_links = self.get_mineral_links(letter)

            # 使用 tqdm 创建进度条
            with tqdm(total=len(mineral_links), desc=f"Processing letter {letter}") as pbar:
                for mineral_info in mineral_links:
                    if self.download_xray_data(mineral_info):
                        success_count += 1
                    time.sleep(delay)
                    pbar.update(1)  # 更新进度条

        print(f"\nDownload completed! Successfully downloaded {success_count} X-ray Data files.")

if __name__ == "__main__":
    scraper = RRUFFScraper()
    scraper.scrape_all('a', 'b',0.01)

Processing letter a: 100%|██████████| 231/231 [02:25<00:00,  1.59it/s]
Processing letter b:  53%|█████▎    | 131/248 [01:29<01:19,  1.46it/s]


KeyboardInterrupt: 

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import re
import numpy as np

class XRDDataset(Dataset):
    def __init__(self, directory, max_length=None):
        """
        Args:
            directory: Path to the directory containing the RRUFF files.
            max_length (int, optional): Maximum length to pad/truncate XRD data.
                If None, uses the longest sequence in the dataset.
        """
        self.directory = directory
        self.filepaths = []
        self.max_length = max_length

        # Collect filepaths and determine max_length if not provided
        temp_max_length = 0
        for entry in os.scandir(directory):
            if entry.is_file() and entry.name.endswith('.txt'):
                self.filepaths.append(entry.path)
                if self.max_length is None:
                    try:
                        xrd_data = self._load_xrd_data(entry.path)
                        temp_max_length = max(temp_max_length, len(xrd_data))
                    except (ValueError, FileNotFoundError) as e:
                        print(f"Skipping file: {e}")
                        self.filepaths.pop()

        if self.max_length is None:
            self.max_length = temp_max_length
        print(f"max_length: {self.max_length}")


    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        filepath = self.filepaths[idx]
        xrd_data, name, crystal_system, file_type = self._load_data(filepath)

        xrd_data = self._pad_or_truncate(xrd_data, self.max_length)

        sample = {
            'xrd_data': xrd_data,
            'name': name,
            'crystal_system': crystal_system,
            'file_type': file_type  # Add file type
        }
        return sample


    def _load_data(self, filepath):
        """Loads data, name, crystal system, and file type from a single file."""
        with open(filepath, 'r') as f:
            lines = f.readlines()

        name = ""
        crystal_system = ""
        xrd_data_start = -1

        # Extract file type from filename
        filename = os.path.basename(filepath)
        if "_-_raw" in filename:
            file_type = "raw"
        elif "_-_processed" in filename:
            file_type = "processed"
        else:
            file_type = "unknown"

        for i, line in enumerate(lines):
            if line.startswith("##NAMES="):
                name = line[8:].strip()
            elif line.startswith("##CELL PARAMETERS="):
                match = re.search(r"crystal system:\s*(\w+)", line)
                if match:
                    crystal_system = match.group(1)
            # MODIFIED REGEX HERE:  Allow optional whitespace
            elif re.match(r"^\d+\.\d+,\s*\d+\.\d+$", line.strip()) or re.match(r"^\d+,\s*\d+$", line.strip()):
                xrd_data_start = i
                break

        if xrd_data_start == -1:
            print(f"No XRD data found in: {filepath}")
            return np.empty((0, 2), dtype=np.float32), "", "", ""

        xrd_lines = lines[xrd_data_start:]
        xrd_data = []
        for line in xrd_lines:
            if line.startswith("##END="):
                break
            try:
                # Handle both float and int pairs
                parts = line.strip().split(",")
                angle, intensity = map(float, parts)  # Convert to float
                xrd_data.append([angle, intensity])
            except ValueError:
                print(f"Warning: Skipping malformed line in {filepath}: {line.strip()}")
                continue

        return np.array(xrd_data, dtype=np.float32), name, crystal_system, file_type


    def _load_xrd_data(self, filepath):
        """Helper function for max_length calculation (loads only XRD data)."""
        with open(filepath, 'r') as f:
             lines = f.readlines()

        xrd_data_start = -1
        for i, line in enumerate(lines):
             # MODIFIED REGEX HERE:  Allow optional whitespace, and integer pairs.
            if re.match(r"^\d+\.\d+,\s*\d+\.\d+$", line.strip()) or re.match(r"^\d+,\s*\d+$", line.strip()):
                xrd_data_start = i
                break

        if xrd_data_start == -1:
          return []

        xrd_lines = lines[xrd_data_start:]
        xrd_data = []
        for line in xrd_lines:
          if line.startswith("##END="):
            break
          try:
              # Handle both float and int pairs
              parts = line.strip().split(",")
              angle, intensity = map(float, parts) # Convert to float
              xrd_data.append([angle, intensity])
          except ValueError:
              print(f"Skipping malformed line (helper) in {filepath}: {line}")
              continue
        return xrd_data

    def _pad_or_truncate(self, xrd_data, max_length):
        """Pads or truncates the XRD data."""
        if len(xrd_data) > max_length:
            xrd_data = xrd_data[:max_length]
        elif len(xrd_data) < max_length:
            padding = np.zeros((max_length - len(xrd_data), 2), dtype=np.float32)
            xrd_data = np.vstack((xrd_data, padding))
        return torch.tensor(xrd_data)



# --- Example Usage ---
if __name__ == '__main__':
    test_dir = './rruff_files'  # Real directory
    dataset = XRDDataset(directory=test_dir, max_length=10000)
    dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

    for i, batch in enumerate(dataloader):
        print(f"\nBatch {i}:")
        print("  XRD Data Batch Shape:", batch['xrd_data'].shape)
        print(f"  Name Type: {type(batch['name'])}")
        print(f"  Crystal System Type: {type(batch['crystal_system'])}")
        print(f"  File Type: {type(batch['file_type'])}")

        for j in range(batch['xrd_data'].shape[0]):
            print(f"    Sample {j} in Batch {i}:")
            print(f"      Crystal System: {batch['crystal_system'][j]}")
            print(f"      Name: {batch['name'][j]}")
            print(f"      File Type: {batch['file_type'][j]}")

        if i == 20:
            break

max_length: 10000

Batch 0:
  XRD Data Batch Shape: torch.Size([2, 10000, 2])
  Name Type: <class 'list'>
  Crystal System Type: <class 'list'>
  File Type: <class 'list'>
    Sample 0 in Batch 0:
      Crystal System: monoclinic
      Name: Brewsterite-Sr
      File Type: processed
    Sample 1 in Batch 0:
      Crystal System: triclinic
      Name: Agrellite
      File Type: processed

Batch 1:
  XRD Data Batch Shape: torch.Size([2, 10000, 2])
  Name Type: <class 'list'>
  Crystal System Type: <class 'list'>
  File Type: <class 'list'>
    Sample 0 in Batch 1:
      Crystal System: orthorhombic
      Name: Aikinite
      File Type: raw
    Sample 1 in Batch 1:
      Crystal System: monoclinic
      Name: Akrochordite
      File Type: raw

Batch 2:
  XRD Data Batch Shape: torch.Size([2, 10000, 2])
  Name Type: <class 'list'>
  Crystal System Type: <class 'list'>
  File Type: <class 'list'>
    Sample 0 in Batch 2:
      Crystal System: monoclinic
      Name: Bilinite
      File Type: 

i wish to create a python class that can handle a crystal system,with their xrd data, 
for each experimental sample class:multi_phase contains array of tuple(single_phase obj,composition obj),
single_phase obj has attribute structure obj and chem_composition obj,chem_composition obj has many floats , 
each corresponds to an element in periodic table, structure obj contains 4 str attribute:crystal_system , 
bragg_lattice,point_group,space_group,;srd data are stored in array of floats

In [None]:
import numpy as np

class ElementComposition:
    """
    Represents the chemical composition of a material.

    Attributes:
        composition (dict): A dictionary where keys are element symbols (str)
                           and values are their corresponding fractions (float).
                           The fractions should sum up to 1 (or close to 1 due to
                           potential floating-point inaccuracies).
    """

    def __init__(self, element_fractions: dict):
        """
        Initializes an ElementComposition object.

        Args:
            element_fractions (dict): A dictionary of element symbols and their fractions.
                                     Example: {"Fe": 0.7, "Ni": 0.3}
        """
        self.composition = element_fractions
        self._validate_composition()

    def _validate_composition(self):
        """
        Validates that the provided composition is valid (non-negative fractions
        and sums to approximately 1).
        Raises ValueError if the composition is invalid.
        """
        total_fraction = 0.0
        for element, fraction in self.composition.items():
            if not isinstance(element, str):
                raise ValueError(f"Element symbol '{element}' must be a string.")
            if not isinstance(fraction, (int, float)):  # Allow integer fractions too
                raise ValueError(f"Fraction for element '{element}' must be a number.")
            if fraction < 0:
                raise ValueError(f"Fraction for element '{element}' cannot be negative.")
            total_fraction += fraction

        if not (0.99999 <= total_fraction <= 1.00001):  # Allow for minor floating-point errors
            raise ValueError(f"Element fractions must sum up to (approximately) 1.  Current sum: {total_fraction}")

    def __str__(self):
        """
        Returns a string representation of the composition.
        """
        return ", ".join([f"{element}: {fraction:.3f}" for element, fraction in self.composition.items()])

    def __repr__(self):
        """
        Returns a more formal string representation (useful for debugging).
        """
        return f"ElementComposition({self.composition})"

    def get_fraction(self, element: str) -> float:
        """
        Returns the fraction of a given element in the composition.
        Returns 0 if the element is not present.

        Args:
            element (str): The element symbol.

        Returns:
            float: The fraction of the element.
        """
        return self.composition.get(element, 0.0)


class Structure:
    """
    Represents the crystallographic structure of a material.

    Attributes:
        crystal_system (str):  The crystal system (e.g., "cubic", "hexagonal").
        bravais_lattice (str): The Bravais lattice (e.g., "fcc", "bcc").
        point_group (str):     The point group (e.g., "m-3m", "6/mmm").
        space_group (str):     The space group (e.g., "Fm-3m", "P6_3/mmc").
    """

    def __init__(self, crystal_system: str, bravais_lattice: str, point_group: str, space_group: str):
        """
        Initializes a Structure object.

        Args:
            crystal_system (str): The crystal system.
            bravais_lattice (str): The Bravais lattice.
            point_group (str): The point group.
            space_group (str): The space group.
        """
        self.crystal_system = crystal_system
        self.bravais_lattice = bravais_lattice
        self.point_group = point_group
        self.space_group = space_group

        self._validate_structure() # validate when construct

    def _validate_structure(self):
      valid_crystal_systems = ["triclinic", "monoclinic", "orthorhombic", "tetragonal", "trigonal", "hexagonal", "cubic"]
      if self.crystal_system.lower() not in valid_crystal_systems:
          raise ValueError(f"Invalid crystal system: {self.crystal_system}.  Must be one of {valid_crystal_systems}.")
        # Add more specific validation for Bravais lattice, point group, and space group as needed,
        # potentially using external data sources or libraries for complete validation.


    def __str__(self):
        """
        Returns a string representation of the structure.
        """
        return (f"Crystal System: {self.crystal_system}, Bravais Lattice: {self.bravais_lattice}, "
                f"Point Group: {self.point_group}, Space Group: {self.space_group}")
    def __repr__(self):
      return f"Structure(crystal_system='{self.crystal_system}', bravais_lattice='{self.bravais_lattice}', point_group='{self.point_group}', space_group='{self.space_group}')"


class SinglePhase:
    """
    Represents a single-phase material.

    Attributes:
        structure (Structure): The crystallographic structure.
        composition (ElementComposition): The chemical composition.
        xrd_data (np.ndarray):  The XRD data (2theta values, intensities).
    """

    def __init__(self, structure: Structure, composition: ElementComposition, xrd_data: np.ndarray = None):
        """
        Initializes a SinglePhase object.

        Args:
            structure (Structure): The crystallographic structure.
            composition (ElementComposition): The chemical composition.
            xrd_data (np.ndarray, optional):  XRD data.  Defaults to None.  Can be set later.
                                            Should be a 2D array: [[2theta_1, intensity_1], [2theta_2, intensity_2], ...]
                                            Or a 1D array of intensities, assuming evenly spaced 2theta.
        """
        self.structure = structure
        self.composition = composition
        self.xrd_data = self._validate_xrd_data(xrd_data) if xrd_data is not None else None  # Validate on initialization

    def _validate_xrd_data(self, xrd_data: np.ndarray):
        """
        Validates the format of the XRD data.
        Raises ValueError if the data is not in the correct format.
        """
        xrd_data = np.array(xrd_data)  # Ensure it's a NumPy array
        if xrd_data.ndim == 1:
            # Assume it's just a 1D array of intensities
            pass # it's okay
        elif xrd_data.ndim == 2:
            if xrd_data.shape[1] != 2:
                raise ValueError("XRD data must be a 1D array of intensities or a 2D array with shape (N, 2) for [2theta, intensity] pairs.")
        else:
          raise ValueError("XRD data must be a 1D array of intensities or a 2D array with shape (N, 2) for [2theta, intensity] pairs.")
        return xrd_data

    def set_xrd_data(self, xrd_data: np.ndarray):
        """
        Sets or updates the XRD data for the phase.

        Args:
            xrd_data (np.ndarray): The XRD data.
        """
        self.xrd_data = self._validate_xrd_data(xrd_data)

    def __str__(self):
        return f"Structure: {self.structure}\nComposition: {self.composition}"

    def __repr__(self):
      return f"SinglePhase(structure={repr(self.structure)}, composition={repr(self.composition)}, xrd_data={'[...]' if self.xrd_data is not None else 'None'})"


class MultiPhase:
    """
    Represents a multi-phase material, consisting of multiple SinglePhase objects.

    Attributes:
        phases (list): A list of tuples, where each tuple contains a SinglePhase object and its
                       corresponding weight fraction (float). The weight fractions should sum to 1.
        xrd_data (np.ndarray): The combined XRD pattern of all phases. This can be calculated
                               from the individual phase XRD data and their weight fractions.
    """

    def __init__(self, phases: list[tuple[SinglePhase, float]], xrd_data: np.ndarray = None):
        """
        Initializes a MultiPhase object.

        Args:
            phases (list): A list of (SinglePhase, weight_fraction) tuples.
            xrd_data (np.ndarray, optional): Combined XRD data. Defaults to None. Can be calculated later.
        """
        self.phases = phases
        self.xrd_data = self._validate_xrd_data(xrd_data) if xrd_data is not None else None # same validation method as SinglePhase
        self._validate_phases()

    def _validate_xrd_data(self, xrd_data: np.ndarray):
        """
        Validates the format of the XRD data.
        Raises ValueError if the data is not in the correct format.
        """
        xrd_data = np.array(xrd_data)  # Ensure it's a NumPy array
        if xrd_data.ndim == 1:
            # Assume it's just a 1D array of intensities
            pass # it's okay
        elif xrd_data.ndim == 2:
            if xrd_data.shape[1] != 2:
                raise ValueError("XRD data must be a 1D array of intensities or a 2D array with shape (N, 2) for [2theta, intensity] pairs.")
        else:
          raise ValueError("XRD data must be a 1D array of intensities or a 2D array with shape (N, 2) for [2theta, intensity] pairs.")
        return xrd_data
    def _validate_phases(self):
        """
        Validates that the phase fractions sum up to approximately 1.
        Raises ValueError if the fractions are invalid.
        """
        total_fraction = 0.0
        for phase, fraction in self.phases:
            if not isinstance(phase, SinglePhase):
                raise ValueError("Each phase must be a SinglePhase object.")
            if not isinstance(fraction, (int, float)):
                raise ValueError("Phase fractions must be numbers.")
            if fraction < 0:
                raise ValueError("Phase fractions cannot be negative.")
            total_fraction += fraction

        if not (0.99999 <= total_fraction <= 1.00001):  # Allow for minor floating-point errors
            raise ValueError(f"Phase fractions must sum up to (approximately) 1.  Current sum: {total_fraction}")

    def calculate_combined_xrd(self, two_theta: np.ndarray = None):
        """
        Calculates the combined XRD pattern from the individual phases and their fractions.
        Assumes linear combination of intensities.  Requires that each SinglePhase has xrd_data.

        Args:
            two_theta (np.ndarray, optional):  The 2theta values at which to calculate the combined XRD.
                                              If None, it tries to use the 2theta values from the first phase.
                                              If the phases have different 2theta ranges, you MUST provide this.
        Returns:
            np.ndarray: The combined XRD intensities.
        """

        if two_theta is None:
            # Try to get 2theta from the first phase
            if self.phases and self.phases[0][0].xrd_data is not None:
                if self.phases[0][0].xrd_data.ndim == 2:
                  two_theta = self.phases[0][0].xrd_data[:, 0]  # Get 2theta from the first phase
                else:
                  #if phases are 1D array data, can not determine combined_xrd if no 2theta input
                  raise ValueError("Cannot determine 2theta values. Please provide a two_theta array.")
            else:
                raise ValueError("Cannot determine 2theta values. Please provide a two_theta array.")

        combined_intensities = np.zeros_like(two_theta, dtype=float)

        for phase, fraction in self.phases:
            if phase.xrd_data is None:
                raise ValueError("Cannot calculate combined XRD.  One or more phases are missing XRD data.")

            if phase.xrd_data.ndim == 2:
              phase_two_theta = phase.xrd_data[:, 0]
              phase_intensities = phase.xrd_data[:, 1]
            elif phase.xrd_data.ndim == 1: # if phase data only has intensities, assume 2theta array is the same as the input
              if len(phase.xrd_data) != len(two_theta):
                raise ValueError("The input of phase intensity should have same dimension as the input 2theta array")
              phase_two_theta = two_theta
              phase_intensities = phase.xrd_data
            # Interpolate the phase's intensities to the common two_theta values
            interpolated_intensities = np.interp(two_theta, phase_two_theta, phase_intensities, left=0.0, right=0.0)  # extrapolate with 0
            combined_intensities += fraction * interpolated_intensities

        return combined_intensities

    def __str__(self):
      phase_strs = [f"  Phase {i+1}: {phase} (Fraction: {fraction:.3f})" for i, (phase, fraction) in enumerate(self.phases)]
      return "MultiPhase:\n" + "\n".join(phase_strs)

    def __repr__(self):
      return f"MultiPhase(phases=[{', '.join(f'({repr(phase)}, {fraction})' for phase, fraction in self.phases)}], xrd_data={'[...]' if self.xrd_data is not None else 'None'})"
    def set_xrd_data(self, xrd_data: np.ndarray):
        """
        Sets or updates the XRD data for the MultiPhase material.

        Args:
            xrd_data (np.ndarray): The XRD data.
        """
        self.xrd_data = self._validate_xrd_data(xrd_data)

# --- Example Usage ---
if __name__ == '__main__':
    # Example 1: Single Phase
    comp1 = ElementComposition({"Fe": 0.7, "Ni": 0.3})
    struct1 = Structure("cubic", "fcc", "m-3m", "Fm-3m")
    xrd1 = np.array([[20.0, 100.0], [30.0, 50.0], [40.0, 200.0]])  # Example XRD data
    phase1 = SinglePhase(struct1, comp1, xrd1)

    print("--- Single Phase Example ---")
    print(phase1)
    print(f"XRD Data (first 3 points):\n{phase1.xrd_data[:3]}")
    print(repr(phase1))


    # Example 2: Multi-Phase
    comp2 = ElementComposition({"Al": 0.9, "O": 0.1})
    struct2 = Structure("hexagonal", "hcp", "6/mmm", "P6_3/mmc")
    xrd2 = np.array([[25.0, 80.0], [35.0, 120.0], [45.0, 150.0]])
    phase2 = SinglePhase(struct2, comp2, xrd2)

    #create phase3 without xrd data
    comp3 = ElementComposition({"Si":0.5, "O":0.5})
    struct3 = Structure("tetragonal", "p4mm", "4/mmm", "I4/mmm")
    phase3 = SinglePhase(struct3, comp3)
    print("\n--- Single Phase without XRD data ---") #test print
    print(phase3)
    print(repr(phase3))

    multi_phase = MultiPhase([(phase1, 0.6), (phase2, 0.4)])
    print("\n--- Multi-Phase Example ---")
    print(multi_phase)
    print(repr(multi_phase))

    # Calculate and print the combined XRD
    combined_xrd = multi_phase.calculate_combined_xrd() # test with auto 2theta determine
    print("\n--- Combined XRD (calculated) ---")
    print(combined_xrd)

    #test combined_xrd method with specified 2theta
    two_theta_custom = np.array([20.0, 25.0, 30.0, 35.0, 40.0, 45.0])
    combined_xrd_custom = multi_phase.calculate_combined_xrd(two_theta_custom)
    print("\n--- Combined XRD (with specified 2theta): ---")
    print(combined_xrd_custom)

    # Example 3: error handling
    try:
        invalid_comp = ElementComposition({"Fe": 0.7, "Ni": 0.5})  # Sum > 1
    except ValueError as e:
        print(f"\nError creating composition: {e}")

    try:
      multi_phase_test = MultiPhase([(phase1, 0.6), (phase3, 0.4)])
      combined_xrd = multi_phase_test.calculate_combined_xrd(two_theta_custom)
    except ValueError as e:
      print(f"\nError calculating combined xrd:{e}")

    try:
      invalid_multi = MultiPhase([(phase1, 0.6), (phase2, 0.6)])
    except ValueError as e:
        print(f"\nError creating multi-phase: {e}")

    try:
        invalid_struct = Structure("invalid", "fcc", "m-3m", "Fm-3m")
    except ValueError as e:
        print(f"\nError creating structure: {e}")

    try:
        xrd_invalid = np.array([[[1,2,3]],[[4,5,6]]]) # invalid xrd data
        phase_invalid_xrd = SinglePhase(struct1, comp1, xrd_invalid)
    except ValueError as e:
        print(f"\nError creating single phase with invalid xrd: {e}")

    try:
        xrd_invalid = np.array([[[1,2,3]],[[4,5,6]]])
        multi_phase.set_xrd_data(xrd_invalid)
    except ValueError as e:
       print(f"\nError setting invalid xrd data to exist MultiPhase obj: {e}")
    #test 1D intensity array as input
    print("\n---test 1D intensity input---")
    xrd_1d = np.array([10,25,46,100,2]) # 1d array
    phase_1d_xrd = SinglePhase(struct1, comp1, xrd_1d)
    print(phase_1d_xrd.xrd_data) # test output
    #calculate combined xrd with custom 2theta, with 1D phase xrd data
    multi_phase_1d_test = MultiPhase([(phase_1d_xrd, 0.6), (phase2, 0.4)])
    combined_xrd = multi_phase_1d_test.calculate_combined_xrd(np.linspace(1,5,5))#two_theta array should have same size as 1D intensity array
    print(combined_xrd)